LLVM  8.0.1
ARMISelLowering.cpp
Go to the documentation of this file.
1 //===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the interfaces that ARM uses to lower LLVM code into a
11 // selection DAG.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ARMISelLowering.h"
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMCallingConv.h"
19 #include "ARMConstantPoolValue.h"
20 #include "ARMMachineFunctionInfo.h"
21 #include "ARMPerfectShuffle.h"
22 #include "ARMRegisterInfo.h"
23 #include "ARMSelectionDAGInfo.h"
24 #include "ARMSubtarget.h"
27 #include "Utils/ARMBaseInfo.h"
28 #include "llvm/ADT/APFloat.h"
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/BitVector.h"
32 #include "llvm/ADT/DenseMap.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallVector.h"
36 #include "llvm/ADT/Statistic.h"
37 #include "llvm/ADT/StringExtras.h"
38 #include "llvm/ADT/StringRef.h"
39 #include "llvm/ADT/StringSwitch.h"
40 #include "llvm/ADT/Triple.h"
41 #include "llvm/ADT/Twine.h"
65 #include "llvm/IR/Attributes.h"
66 #include "llvm/IR/CallingConv.h"
67 #include "llvm/IR/Constant.h"
68 #include "llvm/IR/Constants.h"
69 #include "llvm/IR/DataLayout.h"
70 #include "llvm/IR/DebugLoc.h"
71 #include "llvm/IR/DerivedTypes.h"
72 #include "llvm/IR/Function.h"
73 #include "llvm/IR/GlobalAlias.h"
74 #include "llvm/IR/GlobalValue.h"
75 #include "llvm/IR/GlobalVariable.h"
76 #include "llvm/IR/IRBuilder.h"
77 #include "llvm/IR/InlineAsm.h"
78 #include "llvm/IR/Instruction.h"
79 #include "llvm/IR/Instructions.h"
80 #include "llvm/IR/IntrinsicInst.h"
81 #include "llvm/IR/Intrinsics.h"
82 #include "llvm/IR/Module.h"
83 #include "llvm/IR/Type.h"
84 #include "llvm/IR/User.h"
85 #include "llvm/IR/Value.h"
86 #include "llvm/MC/MCInstrDesc.h"
88 #include "llvm/MC/MCRegisterInfo.h"
89 #include "llvm/MC/MCSchedule.h"
92 #include "llvm/Support/Casting.h"
93 #include "llvm/Support/CodeGen.h"
95 #include "llvm/Support/Compiler.h"
96 #include "llvm/Support/Debug.h"
98 #include "llvm/Support/KnownBits.h"
100 #include "llvm/Support/MathExtras.h"
104 #include <algorithm>
105 #include <cassert>
106 #include <cstdint>
107 #include <cstdlib>
108 #include <iterator>
109 #include <limits>
110 #include <string>
111 #include <tuple>
112 #include <utility>
113 #include <vector>
114 
115 using namespace llvm;
116 
117 #define DEBUG_TYPE "arm-isel"
118 
119 STATISTIC(NumTailCalls, "Number of tail calls");
120 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
121 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
122 STATISTIC(NumConstpoolPromoted,
123  "Number of constants with their storage promoted into constant pools");
124 
125 static cl::opt<bool>
126 ARMInterworking("arm-interworking", cl::Hidden,
127  cl::desc("Enable / disable ARM interworking (for debugging only)"),
128  cl::init(true));
129 
131  "arm-promote-constant", cl::Hidden,
132  cl::desc("Enable / disable promotion of unnamed_addr constants into "
133  "constant pools"),
134  cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
136  "arm-promote-constant-max-size", cl::Hidden,
137  cl::desc("Maximum size of constant to promote into a constant pool"),
138  cl::init(64));
140  "arm-promote-constant-max-total", cl::Hidden,
141  cl::desc("Maximum size of ALL constants to promote into a constant pool"),
142  cl::init(128));
143 
144 // The APCS parameter registers.
145 static const MCPhysReg GPRArgRegs[] = {
146  ARM::R0, ARM::R1, ARM::R2, ARM::R3
147 };
148 
149 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
150  MVT PromotedBitwiseVT) {
151  if (VT != PromotedLdStVT) {
153  AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
154 
156  AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
157  }
158 
159  MVT ElemTy = VT.getVectorElementType();
160  if (ElemTy != MVT::f64)
164  if (ElemTy == MVT::i32) {
169  } else {
174  }
183  if (VT.isInteger()) {
187  }
188 
189  // Promote all bit-wise operations.
190  if (VT.isInteger() && VT != PromotedBitwiseVT) {
192  AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
194  AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
196  AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
197  }
198 
199  // Neon does not support vector divide/remainder operations.
206 
207  if (!VT.isFloatingPoint() &&
208  VT != MVT::v2i64 && VT != MVT::v1i64)
209  for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
210  setOperationAction(Opcode, VT, Legal);
211 }
212 
213 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
214  addRegisterClass(VT, &ARM::DPRRegClass);
215  addTypeForNEON(VT, MVT::f64, MVT::v2i32);
216 }
217 
218 void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
219  addRegisterClass(VT, &ARM::DPairRegClass);
220  addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
221 }
222 
224  const ARMSubtarget &STI)
225  : TargetLowering(TM), Subtarget(&STI) {
226  RegInfo = Subtarget->getRegisterInfo();
227  Itins = Subtarget->getInstrItineraryData();
228 
231 
232  if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
233  !Subtarget->isTargetWatchOS()) {
234  bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
235  for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
236  setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
237  IsHFTarget ? CallingConv::ARM_AAPCS_VFP
239  }
240 
241  if (Subtarget->isTargetMachO()) {
242  // Uses VFP for Thumb libfuncs if available.
243  if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
244  Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
245  static const struct {
246  const RTLIB::Libcall Op;
247  const char * const Name;
248  const ISD::CondCode Cond;
249  } LibraryCalls[] = {
250  // Single-precision floating-point arithmetic.
251  { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
252  { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
253  { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
254  { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
255 
256  // Double-precision floating-point arithmetic.
257  { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
258  { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
259  { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
260  { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
261 
262  // Single-precision comparisons.
263  { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
264  { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
265  { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
266  { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
267  { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
268  { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
269  { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
270  { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ },
271 
272  // Double-precision comparisons.
273  { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
274  { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
275  { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
276  { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
277  { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
278  { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
279  { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
280  { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ },
281 
282  // Floating-point to integer conversions.
283  // i64 conversions are done via library routines even when generating VFP
284  // instructions, so use the same ones.
285  { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
286  { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
287  { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
288  { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
289 
290  // Conversions between floating types.
291  { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
292  { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
293 
294  // Integer to floating-point conversions.
295  // i64 conversions are done via library routines even when generating VFP
296  // instructions, so use the same ones.
297  // FIXME: There appears to be some naming inconsistency in ARM libgcc:
298  // e.g., __floatunsidf vs. __floatunssidfvfp.
299  { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
300  { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
301  { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
302  { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
303  };
304 
305  for (const auto &LC : LibraryCalls) {
306  setLibcallName(LC.Op, LC.Name);
307  if (LC.Cond != ISD::SETCC_INVALID)
308  setCmpLibcallCC(LC.Op, LC.Cond);
309  }
310  }
311  }
312 
313  // These libcalls are not available in 32-bit.
314  setLibcallName(RTLIB::SHL_I128, nullptr);
315  setLibcallName(RTLIB::SRL_I128, nullptr);
316  setLibcallName(RTLIB::SRA_I128, nullptr);
317 
318  // RTLIB
319  if (Subtarget->isAAPCS_ABI() &&
320  (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
321  Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
322  static const struct {
323  const RTLIB::Libcall Op;
324  const char * const Name;
325  const CallingConv::ID CC;
326  const ISD::CondCode Cond;
327  } LibraryCalls[] = {
328  // Double-precision floating-point arithmetic helper functions
329  // RTABI chapter 4.1.2, Table 2
330  { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
331  { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
332  { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
333  { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
334 
335  // Double-precision floating-point comparison helper functions
336  // RTABI chapter 4.1.2, Table 3
337  { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
338  { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
339  { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
340  { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
341  { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
342  { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
343  { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
344  { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
345 
346  // Single-precision floating-point arithmetic helper functions
347  // RTABI chapter 4.1.2, Table 4
348  { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
349  { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
350  { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
351  { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
352 
353  // Single-precision floating-point comparison helper functions
354  // RTABI chapter 4.1.2, Table 5
355  { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
356  { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
357  { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
358  { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
359  { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
360  { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
361  { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
362  { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
363 
364  // Floating-point to integer conversions.
365  // RTABI chapter 4.1.2, Table 6
366  { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
367  { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
368  { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
369  { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
370  { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
371  { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
372  { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
373  { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
374 
375  // Conversions between floating types.
376  // RTABI chapter 4.1.2, Table 7
377  { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
378  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
379  { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
380 
381  // Integer to floating-point conversions.
382  // RTABI chapter 4.1.2, Table 8
383  { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
384  { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
385  { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
386  { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
387  { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
388  { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
389  { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
390  { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
391 
392  // Long long helper functions
393  // RTABI chapter 4.2, Table 9
394  { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
395  { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
396  { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
397  { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
398 
399  // Integer division functions
400  // RTABI chapter 4.3.1
401  { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
402  { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
403  { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
404  { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
405  { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
406  { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
407  { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
408  { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
409  };
410 
411  for (const auto &LC : LibraryCalls) {
412  setLibcallName(LC.Op, LC.Name);
413  setLibcallCallingConv(LC.Op, LC.CC);
414  if (LC.Cond != ISD::SETCC_INVALID)
415  setCmpLibcallCC(LC.Op, LC.Cond);
416  }
417 
418  // EABI dependent RTLIB
419  if (TM.Options.EABIVersion == EABI::EABI4 ||
421  static const struct {
422  const RTLIB::Libcall Op;
423  const char *const Name;
424  const CallingConv::ID CC;
425  const ISD::CondCode Cond;
426  } MemOpsLibraryCalls[] = {
427  // Memory operations
428  // RTABI chapter 4.3.4
430  { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
431  { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
432  };
433 
434  for (const auto &LC : MemOpsLibraryCalls) {
435  setLibcallName(LC.Op, LC.Name);
436  setLibcallCallingConv(LC.Op, LC.CC);
437  if (LC.Cond != ISD::SETCC_INVALID)
438  setCmpLibcallCC(LC.Op, LC.Cond);
439  }
440  }
441  }
442 
443  if (Subtarget->isTargetWindows()) {
444  static const struct {
445  const RTLIB::Libcall Op;
446  const char * const Name;
447  const CallingConv::ID CC;
448  } LibraryCalls[] = {
449  { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
450  { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
451  { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
452  { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
453  { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
454  { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
455  { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
456  { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
457  };
458 
459  for (const auto &LC : LibraryCalls) {
460  setLibcallName(LC.Op, LC.Name);
461  setLibcallCallingConv(LC.Op, LC.CC);
462  }
463  }
464 
465  // Use divmod compiler-rt calls for iOS 5.0 and later.
466  if (Subtarget->isTargetMachO() &&
467  !(Subtarget->isTargetIOS() &&
468  Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
469  setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
470  setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
471  }
472 
473  // The half <-> float conversion functions are always soft-float on
474  // non-watchos platforms, but are needed for some targets which use a
475  // hard-float calling convention by default.
476  if (!Subtarget->isTargetWatchABI()) {
477  if (Subtarget->isAAPCS_ABI()) {
478  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
479  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
480  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
481  } else {
482  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
483  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
484  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
485  }
486  }
487 
488  // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
489  // a __gnu_ prefix (which is the default).
490  if (Subtarget->isTargetAEABI()) {
491  static const struct {
492  const RTLIB::Libcall Op;
493  const char * const Name;
494  const CallingConv::ID CC;
495  } LibraryCalls[] = {
496  { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
497  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
498  { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
499  };
500 
501  for (const auto &LC : LibraryCalls) {
502  setLibcallName(LC.Op, LC.Name);
503  setLibcallCallingConv(LC.Op, LC.CC);
504  }
505  }
506 
507  if (Subtarget->isThumb1Only())
508  addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
509  else
510  addRegisterClass(MVT::i32, &ARM::GPRRegClass);
511 
512  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
513  !Subtarget->isThumb1Only()) {
514  addRegisterClass(MVT::f32, &ARM::SPRRegClass);
515  addRegisterClass(MVT::f64, &ARM::DPRRegClass);
516  }
517 
518  if (Subtarget->hasFullFP16()) {
519  addRegisterClass(MVT::f16, &ARM::HPRRegClass);
523 
526  }
527 
528  for (MVT VT : MVT::vector_valuetypes()) {
529  for (MVT InnerVT : MVT::vector_valuetypes()) {
530  setTruncStoreAction(VT, InnerVT, Expand);
531  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
532  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
533  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
534  }
535 
540 
542  }
543 
546 
549 
550  if (Subtarget->hasNEON()) {
551  addDRTypeForNEON(MVT::v2f32);
552  addDRTypeForNEON(MVT::v8i8);
553  addDRTypeForNEON(MVT::v4i16);
554  addDRTypeForNEON(MVT::v2i32);
555  addDRTypeForNEON(MVT::v1i64);
556 
557  addQRTypeForNEON(MVT::v4f32);
558  addQRTypeForNEON(MVT::v2f64);
559  addQRTypeForNEON(MVT::v16i8);
560  addQRTypeForNEON(MVT::v8i16);
561  addQRTypeForNEON(MVT::v4i32);
562  addQRTypeForNEON(MVT::v2i64);
563 
564  if (Subtarget->hasFullFP16()) {
565  addQRTypeForNEON(MVT::v8f16);
566  addDRTypeForNEON(MVT::v4f16);
567  }
568 
569  // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
570  // neither Neon nor VFP support any arithmetic operations on it.
571  // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
572  // supported for v4f32.
576  // FIXME: Code duplication: FDIV and FREM are expanded always, see
577  // ARMTargetLowering::addTypeForNEON method for details.
580  // FIXME: Create unittest.
581  // In another words, find a way when "copysign" appears in DAG with vector
582  // operands.
584  // FIXME: Code duplication: SETCC has custom operation action, see
585  // ARMTargetLowering::addTypeForNEON method for details.
587  // FIXME: Create unittest for FNEG and for FABS.
599  // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
606 
621 
622  // Mark v2f32 intrinsics.
637 
638  // Neon does not support some operations on v1i64 and v2i64 types.
640  // Custom handling for some quad-vector types to detect VMULL.
644  // Custom handling for some vector types to avoid expensive expansions
649  // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
650  // a destination type that is wider than the source, and nor does
651  // it have a FP_TO_[SU]INT instruction with a narrower destination than
652  // source.
661 
664 
665  // NEON does not have single instruction CTPOP for vectors with element
666  // types wider than 8-bits. However, custom lowering can leverage the
667  // v8i8/v16i8 vcnt instruction.
674 
677 
678  // NEON does not have single instruction CTTZ for vectors.
683 
688 
693 
698 
699  // NEON only has FMA instructions as of VFP4.
700  if (!Subtarget->hasVFP4()) {
703  }
704 
722 
723  // It is legal to extload from v4i8 to v4i16 or v4i32.
725  MVT::v2i32}) {
726  for (MVT VT : MVT::integer_vector_valuetypes()) {
730  }
731  }
732  }
733 
734  if (Subtarget->isFPOnlySP()) {
735  // When targeting a floating-point unit with only single-precision
736  // operations, f64 is legal for the few double-precision instructions which
737  // are present However, no double-precision operations other than moves,
738  // loads and stores are provided by the hardware.
771  }
772 
774 
775  // ARM does not have floating-point extending loads.
776  for (MVT VT : MVT::fp_valuetypes()) {
779  }
780 
781  // ... or truncating stores
785 
786  // ARM does not have i1 sign extending load.
787  for (MVT VT : MVT::integer_valuetypes())
789 
790  // ARM supports all 4 flavors of integer indexed load / store.
791  if (!Subtarget->isThumb1Only()) {
792  for (unsigned im = (unsigned)ISD::PRE_INC;
802  }
803  } else {
804  // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
807  }
808 
813 
816 
817  // i64 operation support.
820  if (Subtarget->isThumb1Only()) {
823  }
824  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
825  || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
827 
834 
835  // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
836  if (Subtarget->isThumb1Only()) {
840  }
841 
842  if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
844 
845  // ARM does not have ROTL.
847  for (MVT VT : MVT::vector_valuetypes()) {
850  }
853  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
856  }
857 
858  // @llvm.readcyclecounter requires the Performance Monitors extension.
859  // Default to the 0 expansion on unsupported platforms.
860  // FIXME: Technically there are older ARM CPUs that have
861  // implementation-specific ways of obtaining this information.
862  if (Subtarget->hasPerfMon())
864 
865  // Only ARMv6 has BSWAP.
866  if (!Subtarget->hasV6Ops())
868 
869  bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
870  : Subtarget->hasDivideInARMMode();
871  if (!hasDivide) {
872  // These are expanded into libcalls if the cpu doesn't have HW divider.
875  }
876 
877  if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
880 
883  }
884 
887 
888  // Register based DivRem for AEABI (RTABI 4.2)
889  if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
890  Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
891  Subtarget->isTargetWindows()) {
894  HasStandaloneRem = false;
895 
896  if (Subtarget->isTargetWindows()) {
897  const struct {
898  const RTLIB::Libcall Op;
899  const char * const Name;
900  const CallingConv::ID CC;
901  } LibraryCalls[] = {
902  { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
903  { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
904  { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
905  { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
906 
907  { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
908  { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
909  { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
910  { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
911  };
912 
913  for (const auto &LC : LibraryCalls) {
914  setLibcallName(LC.Op, LC.Name);
915  setLibcallCallingConv(LC.Op, LC.CC);
916  }
917  } else {
918  const struct {
919  const RTLIB::Libcall Op;
920  const char * const Name;
921  const CallingConv::ID CC;
922  } LibraryCalls[] = {
923  { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
924  { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
925  { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
926  { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
927 
928  { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
929  { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
930  { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
931  { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
932  };
933 
934  for (const auto &LC : LibraryCalls) {
935  setLibcallName(LC.Op, LC.Name);
936  setLibcallCallingConv(LC.Op, LC.CC);
937  }
938  }
939 
944  } else {
947  }
948 
949  if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT())
950  for (auto &VT : {MVT::f32, MVT::f64})
952 
957 
960 
961  // Use the default implementation.
968 
969  if (Subtarget->isTargetWindows())
971  else
973 
974  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
975  // the default expansion.
976  InsertFencesForAtomic = false;
977  if (Subtarget->hasAnyDataBarrier() &&
978  (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
979  // ATOMIC_FENCE needs custom lowering; the others should have been expanded
980  // to ldrex/strex loops already.
982  if (!Subtarget->isThumb() || !Subtarget->isMClass())
984 
985  // On v8, we have particularly efficient implementations of atomic fences
986  // if they can be combined with nearby atomic loads and stores.
987  if (!Subtarget->hasAcquireRelease() ||
988  getTargetMachine().getOptLevel() == 0) {
989  // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
990  InsertFencesForAtomic = true;
991  }
992  } else {
993  // If there's anything we can use as a barrier, go through custom lowering
994  // for ATOMIC_FENCE.
995  // If target has DMB in thumb, Fences can be inserted.
996  if (Subtarget->hasDataBarrier())
997  InsertFencesForAtomic = true;
998 
1000  Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1001 
1002  // Set them all for expansion, which will force libcalls.
1015  // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1016  // Unordered/Monotonic case.
1017  if (!InsertFencesForAtomic) {
1020  }
1021  }
1022 
1024 
1025  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1026  if (!Subtarget->hasV6Ops()) {
1029  }
1031 
1032  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1033  !Subtarget->isThumb1Only()) {
1034  // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1035  // iff target supports vfp2.
1038  }
1039 
1040  // We want to custom lower some of our intrinsics.
1045  if (Subtarget->useSjLjEH())
1046  setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1047 
1057  if (Subtarget->hasFullFP16()) {
1061  }
1062 
1064 
1067  if (Subtarget->hasFullFP16())
1072 
1073  // We don't support sin/cos/fmod/copysign/pow
1082  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1083  !Subtarget->isThumb1Only()) {
1086  }
1089 
1090  if (!Subtarget->hasVFP4()) {
1093  }
1094 
1095  // Various VFP goodness
1096  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1097  // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1098  if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
1101  }
1102 
1103  // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1104  if (!Subtarget->hasFP16()) {
1107  }
1108  }
1109 
1110  // Use __sincos_stret if available.
1111  if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1112  getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1115  }
1116 
1117  // FP-ARMv8 implements a lot of rounding-like FP operations.
1118  if (Subtarget->hasFPARMv8()) {
1131 
1132  if (!Subtarget->isFPOnlySP()) {
1141  }
1142  }
1143 
1144  if (Subtarget->hasNEON()) {
1145  // vmin and vmax aren't available in a scalar form, so we use
1146  // a NEON instruction with an undef lane instead.
1155 
1156  if (Subtarget->hasFullFP16()) {
1161 
1166  }
1167  }
1168 
1169  // We have target-specific dag combine patterns for the following nodes:
1170  // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1177 
1178  if (Subtarget->hasV6Ops())
1180 
1182 
1183  if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1184  !Subtarget->hasVFP2())
1186  else
1188 
1189  //// temporary - rewrite interface to use type
1190  MaxStoresPerMemset = 8;
1192  MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1194  MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1196 
1197  // On ARM arguments smaller than 4 bytes are extended, so all arguments
1198  // are at least 4 bytes aligned.
1200 
1201  // Prefer likely predicted branches to selects on out-of-order cores.
1202  PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1203 
1205 
1206  setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
1207 }
1208 
1210  return Subtarget->useSoftFloat();
1211 }
1212 
1213 // FIXME: It might make sense to define the representative register class as the
1214 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1215 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1216 // SPR's representative would be DPR_VFP2. This should work well if register
1217 // pressure tracking were modified such that a register use would increment the
1218 // pressure of the register class's representative and all of it's super
1219 // classes' representatives transitively. We have not implemented this because
1220 // of the difficulty prior to coalescing of modeling operand register classes
1221 // due to the common occurrence of cross class copies and subregister insertions
1222 // and extractions.
1223 std::pair<const TargetRegisterClass *, uint8_t>
1225  MVT VT) const {
1226  const TargetRegisterClass *RRC = nullptr;
1227  uint8_t Cost = 1;
1228  switch (VT.SimpleTy) {
1229  default:
1231  // Use DPR as representative register class for all floating point
1232  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1233  // the cost is 1 for both f32 and f64.
1234  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1235  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1236  RRC = &ARM::DPRRegClass;
1237  // When NEON is used for SP, only half of the register file is available
1238  // because operations that define both SP and DP results will be constrained
1239  // to the VFP2 class (D0-D15). We currently model this constraint prior to
1240  // coalescing by double-counting the SP regs. See the FIXME above.
1241  if (Subtarget->useNEONForSinglePrecisionFP())
1242  Cost = 2;
1243  break;
1244  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1245  case MVT::v4f32: case MVT::v2f64:
1246  RRC = &ARM::DPRRegClass;
1247  Cost = 2;
1248  break;
1249  case MVT::v4i64:
1250  RRC = &ARM::DPRRegClass;
1251  Cost = 4;
1252  break;
1253  case MVT::v8i64:
1254  RRC = &ARM::DPRRegClass;
1255  Cost = 8;
1256  break;
1257  }
1258  return std::make_pair(RRC, Cost);
1259 }
1260 
1261 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1262  switch ((ARMISD::NodeType)Opcode) {
1263  case ARMISD::FIRST_NUMBER: break;
1264  case ARMISD::Wrapper: return "ARMISD::Wrapper";
1265  case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
1266  case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
1267  case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
1268  case ARMISD::CALL: return "ARMISD::CALL";
1269  case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
1270  case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
1271  case ARMISD::BRCOND: return "ARMISD::BRCOND";
1272  case ARMISD::BR_JT: return "ARMISD::BR_JT";
1273  case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
1274  case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
1275  case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
1276  case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
1277  case ARMISD::CMP: return "ARMISD::CMP";
1278  case ARMISD::CMN: return "ARMISD::CMN";
1279  case ARMISD::CMPZ: return "ARMISD::CMPZ";
1280  case ARMISD::CMPFP: return "ARMISD::CMPFP";
1281  case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
1282  case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
1283  case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
1284 
1285  case ARMISD::CMOV: return "ARMISD::CMOV";
1286  case ARMISD::SUBS: return "ARMISD::SUBS";
1287 
1288  case ARMISD::SSAT: return "ARMISD::SSAT";
1289  case ARMISD::USAT: return "ARMISD::USAT";
1290 
1291  case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
1292  case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
1293  case ARMISD::RRX: return "ARMISD::RRX";
1294 
1295  case ARMISD::ADDC: return "ARMISD::ADDC";
1296  case ARMISD::ADDE: return "ARMISD::ADDE";
1297  case ARMISD::SUBC: return "ARMISD::SUBC";
1298  case ARMISD::SUBE: return "ARMISD::SUBE";
1299 
1300  case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
1301  case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
1302  case ARMISD::VMOVhr: return "ARMISD::VMOVhr";
1303  case ARMISD::VMOVrh: return "ARMISD::VMOVrh";
1304  case ARMISD::VMOVSR: return "ARMISD::VMOVSR";
1305 
1306  case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
1307  case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
1308  case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
1309 
1310  case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
1311 
1312  case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
1313 
1314  case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
1315 
1316  case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
1317 
1318  case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
1319 
1320  case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
1321  case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
1322 
1323  case ARMISD::VCEQ: return "ARMISD::VCEQ";
1324  case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
1325  case ARMISD::VCGE: return "ARMISD::VCGE";
1326  case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
1327  case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
1328  case ARMISD::VCGEU: return "ARMISD::VCGEU";
1329  case ARMISD::VCGT: return "ARMISD::VCGT";
1330  case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
1331  case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
1332  case ARMISD::VCGTU: return "ARMISD::VCGTU";
1333  case ARMISD::VTST: return "ARMISD::VTST";
1334 
1335  case ARMISD::VSHL: return "ARMISD::VSHL";
1336  case ARMISD::VSHRs: return "ARMISD::VSHRs";
1337  case ARMISD::VSHRu: return "ARMISD::VSHRu";
1338  case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
1339  case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
1340  case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
1341  case ARMISD::VQSHLs: return "ARMISD::VQSHLs";
1342  case ARMISD::VQSHLu: return "ARMISD::VQSHLu";
1343  case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu";
1344  case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs";
1345  case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu";
1346  case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu";
1347  case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs";
1348  case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu";
1349  case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
1350  case ARMISD::VSLI: return "ARMISD::VSLI";
1351  case ARMISD::VSRI: return "ARMISD::VSRI";
1352  case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
1353  case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
1354  case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
1355  case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
1356  case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
1357  case ARMISD::VDUP: return "ARMISD::VDUP";
1358  case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
1359  case ARMISD::VEXT: return "ARMISD::VEXT";
1360  case ARMISD::VREV64: return "ARMISD::VREV64";
1361  case ARMISD::VREV32: return "ARMISD::VREV32";
1362  case ARMISD::VREV16: return "ARMISD::VREV16";
1363  case ARMISD::VZIP: return "ARMISD::VZIP";
1364  case ARMISD::VUZP: return "ARMISD::VUZP";
1365  case ARMISD::VTRN: return "ARMISD::VTRN";
1366  case ARMISD::VTBL1: return "ARMISD::VTBL1";
1367  case ARMISD::VTBL2: return "ARMISD::VTBL2";
1368  case ARMISD::VMULLs: return "ARMISD::VMULLs";
1369  case ARMISD::VMULLu: return "ARMISD::VMULLu";
1370  case ARMISD::UMAAL: return "ARMISD::UMAAL";
1371  case ARMISD::UMLAL: return "ARMISD::UMLAL";
1372  case ARMISD::SMLAL: return "ARMISD::SMLAL";
1373  case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
1374  case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
1375  case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
1376  case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
1377  case ARMISD::SMULWB: return "ARMISD::SMULWB";
1378  case ARMISD::SMULWT: return "ARMISD::SMULWT";
1379  case ARMISD::SMLALD: return "ARMISD::SMLALD";
1380  case ARMISD::SMLALDX: return "ARMISD::SMLALDX";
1381  case ARMISD::SMLSLD: return "ARMISD::SMLSLD";
1382  case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
1383  case ARMISD::SMMLAR: return "ARMISD::SMMLAR";
1384  case ARMISD::SMMLSR: return "ARMISD::SMMLSR";
1385  case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
1386  case ARMISD::BFI: return "ARMISD::BFI";
1387  case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
1388  case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
1389  case ARMISD::VBSL: return "ARMISD::VBSL";
1390  case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
1391  case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
1392  case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
1393  case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
1394  case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
1395  case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
1396  case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
1397  case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
1398  case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
1399  case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
1400  case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
1401  case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
1402  case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD";
1403  case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
1404  case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
1405  case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
1406  case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
1407  case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
1408  case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
1409  case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
1410  case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
1411  case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
1412  case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
1413  }
1414  return nullptr;
1415 }
1416 
1418  EVT VT) const {
1419  if (!VT.isVector())
1420  return getPointerTy(DL);
1422 }
1423 
1424 /// getRegClassFor - Return the register class that should be used for the
1425 /// specified value type.
1427  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1428  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1429  // load / store 4 to 8 consecutive D registers.
1430  if (Subtarget->hasNEON()) {
1431  if (VT == MVT::v4i64)
1432  return &ARM::QQPRRegClass;
1433  if (VT == MVT::v8i64)
1434  return &ARM::QQQQPRRegClass;
1435  }
1436  return TargetLowering::getRegClassFor(VT);
1437 }
1438 
1439 // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1440 // source/dest is aligned and the copy size is large enough. We therefore want
1441 // to align such objects passed to memory intrinsics.
1443  unsigned &PrefAlign) const {
1444  if (!isa<MemIntrinsic>(CI))
1445  return false;
1446  MinSize = 8;
1447  // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1448  // cycle faster than 4-byte aligned LDM.
1449  PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
1450  return true;
1451 }
1452 
1453 // Create a fast isel object.
1454 FastISel *
1456  const TargetLibraryInfo *libInfo) const {
1457  return ARM::createFastISel(funcInfo, libInfo);
1458 }
1459 
1461  unsigned NumVals = N->getNumValues();
1462  if (!NumVals)
1463  return Sched::RegPressure;
1464 
1465  for (unsigned i = 0; i != NumVals; ++i) {
1466  EVT VT = N->getValueType(i);
1467  if (VT == MVT::Glue || VT == MVT::Other)
1468  continue;
1469  if (VT.isFloatingPoint() || VT.isVector())
1470  return Sched::ILP;
1471  }
1472 
1473  if (!N->isMachineOpcode())
1474  return Sched::RegPressure;
1475 
1476  // Load are scheduled for latency even if there instruction itinerary
1477  // is not available.
1478  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1479  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1480 
1481  if (MCID.getNumDefs() == 0)
1482  return Sched::RegPressure;
1483  if (!Itins->isEmpty() &&
1484  Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1485  return Sched::ILP;
1486 
1487  return Sched::RegPressure;
1488 }
1489 
1490 //===----------------------------------------------------------------------===//
1491 // Lowering Code
1492 //===----------------------------------------------------------------------===//
1493 
1494 static bool isSRL16(const SDValue &Op) {
1495  if (Op.getOpcode() != ISD::SRL)
1496  return false;
1497  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1498  return Const->getZExtValue() == 16;
1499  return false;
1500 }
1501 
1502 static bool isSRA16(const SDValue &Op) {
1503  if (Op.getOpcode() != ISD::SRA)
1504  return false;
1505  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1506  return Const->getZExtValue() == 16;
1507  return false;
1508 }
1509 
1510 static bool isSHL16(const SDValue &Op) {
1511  if (Op.getOpcode() != ISD::SHL)
1512  return false;
1513  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1514  return Const->getZExtValue() == 16;
1515  return false;
1516 }
1517 
1518 // Check for a signed 16-bit value. We special case SRA because it makes it
1519 // more simple when also looking for SRAs that aren't sign extending a
1520 // smaller value. Without the check, we'd need to take extra care with
1521 // checking order for some operations.
1522 static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1523  if (isSRA16(Op))
1524  return isSHL16(Op.getOperand(0));
1525  return DAG.ComputeNumSignBits(Op) == 17;
1526 }
1527 
1528 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1530  switch (CC) {
1531  default: llvm_unreachable("Unknown condition code!");
1532  case ISD::SETNE: return ARMCC::NE;
1533  case ISD::SETEQ: return ARMCC::EQ;
1534  case ISD::SETGT: return ARMCC::GT;
1535  case ISD::SETGE: return ARMCC::GE;
1536  case ISD::SETLT: return ARMCC::LT;
1537  case ISD::SETLE: return ARMCC::LE;
1538  case ISD::SETUGT: return ARMCC::HI;
1539  case ISD::SETUGE: return ARMCC::HS;
1540  case ISD::SETULT: return ARMCC::LO;
1541  case ISD::SETULE: return ARMCC::LS;
1542  }
1543 }
1544 
1545 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1547  ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) {
1548  CondCode2 = ARMCC::AL;
1549  InvalidOnQNaN = true;
1550  switch (CC) {
1551  default: llvm_unreachable("Unknown FP condition!");
1552  case ISD::SETEQ:
1553  case ISD::SETOEQ:
1554  CondCode = ARMCC::EQ;
1555  InvalidOnQNaN = false;
1556  break;
1557  case ISD::SETGT:
1558  case ISD::SETOGT: CondCode = ARMCC::GT; break;
1559  case ISD::SETGE:
1560  case ISD::SETOGE: CondCode = ARMCC::GE; break;
1561  case ISD::SETOLT: CondCode = ARMCC::MI; break;
1562  case ISD::SETOLE: CondCode = ARMCC::LS; break;
1563  case ISD::SETONE:
1564  CondCode = ARMCC::MI;
1565  CondCode2 = ARMCC::GT;
1566  InvalidOnQNaN = false;
1567  break;
1568  case ISD::SETO: CondCode = ARMCC::VC; break;
1569  case ISD::SETUO: CondCode = ARMCC::VS; break;
1570  case ISD::SETUEQ:
1571  CondCode = ARMCC::EQ;
1572  CondCode2 = ARMCC::VS;
1573  InvalidOnQNaN = false;
1574  break;
1575  case ISD::SETUGT: CondCode = ARMCC::HI; break;
1576  case ISD::SETUGE: CondCode = ARMCC::PL; break;
1577  case ISD::SETLT:
1578  case ISD::SETULT: CondCode = ARMCC::LT; break;
1579  case ISD::SETLE:
1580  case ISD::SETULE: CondCode = ARMCC::LE; break;
1581  case ISD::SETNE:
1582  case ISD::SETUNE:
1583  CondCode = ARMCC::NE;
1584  InvalidOnQNaN = false;
1585  break;
1586  }
1587 }
1588 
1589 //===----------------------------------------------------------------------===//
1590 // Calling Convention Implementation
1591 //===----------------------------------------------------------------------===//
1592 
1593 #include "ARMGenCallingConv.inc"
1594 
1595 /// getEffectiveCallingConv - Get the effective calling convention, taking into
1596 /// account presence of floating point hardware and calling convention
1597 /// limitations, such as support for variadic functions.
1599 ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1600  bool isVarArg) const {
1601  switch (CC) {
1602  default:
1603  report_fatal_error("Unsupported calling convention");
1605  case CallingConv::ARM_APCS:
1606  case CallingConv::GHC:
1607  return CC;
1611  case CallingConv::Swift:
1613  case CallingConv::C:
1614  if (!Subtarget->isAAPCS_ABI())
1615  return CallingConv::ARM_APCS;
1616  else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
1618  !isVarArg)
1620  else
1621  return CallingConv::ARM_AAPCS;
1622  case CallingConv::Fast:
1624  if (!Subtarget->isAAPCS_ABI()) {
1625  if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1626  return CallingConv::Fast;
1627  return CallingConv::ARM_APCS;
1628  } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1630  else
1631  return CallingConv::ARM_AAPCS;
1632  }
1633 }
1634 
1636  bool isVarArg) const {
1637  return CCAssignFnForNode(CC, false, isVarArg);
1638 }
1639 
1641  bool isVarArg) const {
1642  return CCAssignFnForNode(CC, true, isVarArg);
1643 }
1644 
1645 /// CCAssignFnForNode - Selects the correct CCAssignFn for the given
1646 /// CallingConvention.
1647 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1648  bool Return,
1649  bool isVarArg) const {
1650  switch (getEffectiveCallingConv(CC, isVarArg)) {
1651  default:
1652  report_fatal_error("Unsupported calling convention");
1653  case CallingConv::ARM_APCS:
1654  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1656  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1658  return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1659  case CallingConv::Fast:
1660  return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1661  case CallingConv::GHC:
1662  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
1664  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1665  }
1666 }
1667 
1668 /// LowerCallResult - Lower the result values of a call into the
1669 /// appropriate copies out of appropriate physical registers.
1670 SDValue ARMTargetLowering::LowerCallResult(
1671  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
1672  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1673  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1674  SDValue ThisVal) const {
1675  // Assign locations to each value returned by this call.
1677  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1678  *DAG.getContext());
1679  CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
1680 
1681  // Copy all of the result registers out of their specified physreg.
1682  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1683  CCValAssign VA = RVLocs[i];
1684 
1685  // Pass 'this' value directly from the argument to return value, to avoid
1686  // reg unit interference
1687  if (i == 0 && isThisReturn) {
1688  assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
1689  "unexpected return calling convention register assignment");
1690  InVals.push_back(ThisVal);
1691  continue;
1692  }
1693 
1694  SDValue Val;
1695  if (VA.needsCustom()) {
1696  // Handle f64 or half of a v2f64.
1697  SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1698  InFlag);
1699  Chain = Lo.getValue(1);
1700  InFlag = Lo.getValue(2);
1701  VA = RVLocs[++i]; // skip ahead to next loc
1702  SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1703  InFlag);
1704  Chain = Hi.getValue(1);
1705  InFlag = Hi.getValue(2);
1706  if (!Subtarget->isLittle())
1707  std::swap (Lo, Hi);
1708  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1709 
1710  if (VA.getLocVT() == MVT::v2f64) {
1711  SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1712  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1713  DAG.getConstant(0, dl, MVT::i32));
1714 
1715  VA = RVLocs[++i]; // skip ahead to next loc
1716  Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1717  Chain = Lo.getValue(1);
1718  InFlag = Lo.getValue(2);
1719  VA = RVLocs[++i]; // skip ahead to next loc
1720  Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1721  Chain = Hi.getValue(1);
1722  InFlag = Hi.getValue(2);
1723  if (!Subtarget->isLittle())
1724  std::swap (Lo, Hi);
1725  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1726  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1727  DAG.getConstant(1, dl, MVT::i32));
1728  }
1729  } else {
1730  Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1731  InFlag);
1732  Chain = Val.getValue(1);
1733  InFlag = Val.getValue(2);
1734  }
1735 
1736  switch (VA.getLocInfo()) {
1737  default: llvm_unreachable("Unknown loc info!");
1738  case CCValAssign::Full: break;
1739  case CCValAssign::BCvt:
1740  Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1741  break;
1742  }
1743 
1744  InVals.push_back(Val);
1745  }
1746 
1747  return Chain;
1748 }
1749 
1750 /// LowerMemOpCallTo - Store the argument to the stack.
1751 SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1752  SDValue Arg, const SDLoc &dl,
1753  SelectionDAG &DAG,
1754  const CCValAssign &VA,
1755  ISD::ArgFlagsTy Flags) const {
1756  unsigned LocMemOffset = VA.getLocMemOffset();
1757  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1758  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1759  StackPtr, PtrOff);
1760  return DAG.getStore(
1761  Chain, dl, Arg, PtrOff,
1762  MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
1763 }
1764 
1765 void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
1766  SDValue Chain, SDValue &Arg,
1767  RegsToPassVector &RegsToPass,
1768  CCValAssign &VA, CCValAssign &NextVA,
1769  SDValue &StackPtr,
1770  SmallVectorImpl<SDValue> &MemOpChains,
1771  ISD::ArgFlagsTy Flags) const {
1772  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1773  DAG.getVTList(MVT::i32, MVT::i32), Arg);
1774  unsigned id = Subtarget->isLittle() ? 0 : 1;
1775  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
1776 
1777  if (NextVA.isRegLoc())
1778  RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
1779  else {
1780  assert(NextVA.isMemLoc());
1781  if (!StackPtr.getNode())
1782  StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
1783  getPointerTy(DAG.getDataLayout()));
1784 
1785  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
1786  dl, DAG, NextVA,
1787  Flags));
1788  }
1789 }
1790 
1791 /// LowerCall - Lowering a call into a callseq_start <-
1792 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
1793 /// nodes.
1794 SDValue
1795 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1796  SmallVectorImpl<SDValue> &InVals) const {
1797  SelectionDAG &DAG = CLI.DAG;
1798  SDLoc &dl = CLI.DL;
1800  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1802  SDValue Chain = CLI.Chain;
1803  SDValue Callee = CLI.Callee;
1804  bool &isTailCall = CLI.IsTailCall;
1805  CallingConv::ID CallConv = CLI.CallConv;
1806  bool doesNotRet = CLI.DoesNotReturn;
1807  bool isVarArg = CLI.IsVarArg;
1808 
1809  MachineFunction &MF = DAG.getMachineFunction();
1810  bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1811  bool isThisReturn = false;
1812  bool isSibCall = false;
1813  auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
1814 
1815  // Disable tail calls if they're not supported.
1816  if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
1817  isTailCall = false;
1818 
1819  if (isTailCall) {
1820  // Check if it's really possible to do a tail call.
1821  isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
1822  isVarArg, isStructRet, MF.getFunction().hasStructRetAttr(),
1823  Outs, OutVals, Ins, DAG);
1824  if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall())
1825  report_fatal_error("failed to perform tail call elimination on a call "
1826  "site marked musttail");
1827  // We don't support GuaranteedTailCallOpt for ARM, only automatically
1828  // detected sibcalls.
1829  if (isTailCall) {
1830  ++NumTailCalls;
1831  isSibCall = true;
1832  }
1833  }
1834 
1835  // Analyze operands of the call, assigning locations to each operand.
1837  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1838  *DAG.getContext());
1839  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
1840 
1841  // Get a count of how many bytes are to be pushed on the stack.
1842  unsigned NumBytes = CCInfo.getNextStackOffset();
1843 
1844  // For tail calls, memory operands are available in our caller's stack.
1845  if (isSibCall)
1846  NumBytes = 0;
1847 
1848  // Adjust the stack pointer for the new arguments...
1849  // These operations are automatically eliminated by the prolog/epilog pass
1850  if (!isSibCall)
1851  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
1852 
1853  SDValue StackPtr =
1854  DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
1855 
1856  RegsToPassVector RegsToPass;
1857  SmallVector<SDValue, 8> MemOpChains;
1858 
1859  // Walk the register/memloc assignments, inserting copies/loads. In the case
1860  // of tail call optimization, arguments are handled later.
1861  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1862  i != e;
1863  ++i, ++realArgIdx) {
1864  CCValAssign &VA = ArgLocs[i];
1865  SDValue Arg = OutVals[realArgIdx];
1866  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1867  bool isByVal = Flags.isByVal();
1868 
1869  // Promote the value if needed.
1870  switch (VA.getLocInfo()) {
1871  default: llvm_unreachable("Unknown loc info!");
1872  case CCValAssign::Full: break;
1873  case CCValAssign::SExt:
1874  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
1875  break;
1876  case CCValAssign::ZExt:
1877  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
1878  break;
1879  case CCValAssign::AExt:
1880  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
1881  break;
1882  case CCValAssign::BCvt:
1883  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
1884  break;
1885  }
1886 
1887  // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
1888  if (VA.needsCustom()) {
1889  if (VA.getLocVT() == MVT::v2f64) {
1890  SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1891  DAG.getConstant(0, dl, MVT::i32));
1892  SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1893  DAG.getConstant(1, dl, MVT::i32));
1894 
1895  PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
1896  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1897 
1898  VA = ArgLocs[++i]; // skip ahead to next loc
1899  if (VA.isRegLoc()) {
1900  PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
1901  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1902  } else {
1903  assert(VA.isMemLoc());
1904 
1905  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
1906  dl, DAG, VA, Flags));
1907  }
1908  } else {
1909  PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
1910  StackPtr, MemOpChains, Flags);
1911  }
1912  } else if (VA.isRegLoc()) {
1913  if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
1914  Outs[0].VT == MVT::i32) {
1915  assert(VA.getLocVT() == MVT::i32 &&
1916  "unexpected calling convention register assignment");
1917  assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
1918  "unexpected use of 'returned'");
1919  isThisReturn = true;
1920  }
1921  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1922  } else if (isByVal) {
1923  assert(VA.isMemLoc());
1924  unsigned offset = 0;
1925 
1926  // True if this byval aggregate will be split between registers
1927  // and memory.
1928  unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
1929  unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
1930 
1931  if (CurByValIdx < ByValArgsCount) {
1932 
1933  unsigned RegBegin, RegEnd;
1934  CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
1935 
1936  EVT PtrVT =
1938  unsigned int i, j;
1939  for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
1940  SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
1941  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
1942  SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
1944  DAG.InferPtrAlignment(AddArg));
1945  MemOpChains.push_back(Load.getValue(1));
1946  RegsToPass.push_back(std::make_pair(j, Load));
1947  }
1948 
1949  // If parameter size outsides register area, "offset" value
1950  // helps us to calculate stack slot for remained part properly.
1951  offset = RegEnd - RegBegin;
1952 
1953  CCInfo.nextInRegsParam();
1954  }
1955 
1956  if (Flags.getByValSize() > 4*offset) {
1957  auto PtrVT = getPointerTy(DAG.getDataLayout());
1958  unsigned LocMemOffset = VA.getLocMemOffset();
1959  SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1960  SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
1961  SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
1962  SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
1963  SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
1964  MVT::i32);
1965  SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
1966  MVT::i32);
1967 
1968  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
1969  SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
1970  MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
1971  Ops));
1972  }
1973  } else if (!isSibCall) {
1974  assert(VA.isMemLoc());
1975 
1976  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
1977  dl, DAG, VA, Flags));
1978  }
1979  }
1980 
1981  if (!MemOpChains.empty())
1982  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
1983 
1984  // Build a sequence of copy-to-reg nodes chained together with token chain
1985  // and flag operands which copy the outgoing args into the appropriate regs.
1986  SDValue InFlag;
1987  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1988  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1989  RegsToPass[i].second, InFlag);
1990  InFlag = Chain.getValue(1);
1991  }
1992 
1993  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1994  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1995  // node so that legalize doesn't hack it.
1996  bool isDirect = false;
1997 
1998  const TargetMachine &TM = getTargetMachine();
1999  const Module *Mod = MF.getFunction().getParent();
2000  const GlobalValue *GV = nullptr;
2001  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2002  GV = G->getGlobal();
2003  bool isStub =
2004  !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
2005 
2006  bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2007  bool isLocalARMFunc = false;
2009  auto PtrVt = getPointerTy(DAG.getDataLayout());
2010 
2011  if (Subtarget->genLongCalls()) {
2012  assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2013  "long-calls codegen is not position independent!");
2014  // Handle a global address or an external symbol. If it's not one of
2015  // those, the target's already in a register, so we don't need to do
2016  // anything extra.
2017  if (isa<GlobalAddressSDNode>(Callee)) {
2018  // Create a constant pool entry for the callee address
2019  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2020  ARMConstantPoolValue *CPV =
2021  ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
2022 
2023  // Get the address of the callee into a register
2024  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2025  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2026  Callee = DAG.getLoad(
2027  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2029  } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2030  const char *Sym = S->getSymbol();
2031 
2032  // Create a constant pool entry for the callee address
2033  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2034  ARMConstantPoolValue *CPV =
2036  ARMPCLabelIndex, 0);
2037  // Get the address of the callee into a register
2038  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2039  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2040  Callee = DAG.getLoad(
2041  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2043  }
2044  } else if (isa<GlobalAddressSDNode>(Callee)) {
2045  // If we're optimizing for minimum size and the function is called three or
2046  // more times in this block, we can improve codesize by calling indirectly
2047  // as BLXr has a 16-bit encoding.
2048  auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2049  auto *BB = CLI.CS.getParent();
2050  bool PreferIndirect =
2051  Subtarget->isThumb() && MF.getFunction().optForMinSize() &&
2052  count_if(GV->users(), [&BB](const User *U) {
2053  return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
2054  }) > 2;
2055 
2056  if (!PreferIndirect) {
2057  isDirect = true;
2058  bool isDef = GV->isStrongDefinitionForLinker();
2059 
2060  // ARM call to a local ARM function is predicable.
2061  isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2062  // tBX takes a register source operand.
2063  if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2064  assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2065  Callee = DAG.getNode(
2066  ARMISD::WrapperPIC, dl, PtrVt,
2067  DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2068  Callee = DAG.getLoad(
2069  PtrVt, dl, DAG.getEntryNode(), Callee,
2071  /* Alignment = */ 0, MachineMemOperand::MODereferenceable |
2073  } else if (Subtarget->isTargetCOFF()) {
2074  assert(Subtarget->isTargetWindows() &&
2075  "Windows is the only supported COFF target");
2076  unsigned TargetFlags = GV->hasDLLImportStorageClass()
2079  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0,
2080  TargetFlags);
2081  if (GV->hasDLLImportStorageClass())
2082  Callee =
2083  DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2084  DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2086  } else {
2087  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
2088  }
2089  }
2090  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2091  isDirect = true;
2092  // tBX takes a register source operand.
2093  const char *Sym = S->getSymbol();
2094  if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2095  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2096  ARMConstantPoolValue *CPV =
2098  ARMPCLabelIndex, 4);
2099  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2100  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2101  Callee = DAG.getLoad(
2102  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2104  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2105  Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2106  } else {
2107  Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2108  }
2109  }
2110 
2111  // FIXME: handle tail calls differently.
2112  unsigned CallOpc;
2113  if (Subtarget->isThumb()) {
2114  if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2115  CallOpc = ARMISD::CALL_NOLINK;
2116  else
2117  CallOpc = ARMISD::CALL;
2118  } else {
2119  if (!isDirect && !Subtarget->hasV5TOps())
2120  CallOpc = ARMISD::CALL_NOLINK;
2121  else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2122  // Emit regular call when code size is the priority
2123  !MF.getFunction().optForMinSize())
2124  // "mov lr, pc; b _foo" to avoid confusing the RSP
2125  CallOpc = ARMISD::CALL_NOLINK;
2126  else
2127  CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2128  }
2129 
2130  std::vector<SDValue> Ops;
2131  Ops.push_back(Chain);
2132  Ops.push_back(Callee);
2133 
2134  // Add argument registers to the end of the list so that they are known live
2135  // into the call.
2136  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2137  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2138  RegsToPass[i].second.getValueType()));
2139 
2140  // Add a register mask operand representing the call-preserved registers.
2141  if (!isTailCall) {
2142  const uint32_t *Mask;
2143  const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2144  if (isThisReturn) {
2145  // For 'this' returns, use the R0-preserving mask if applicable
2146  Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2147  if (!Mask) {
2148  // Set isThisReturn to false if the calling convention is not one that
2149  // allows 'returned' to be modeled in this way, so LowerCallResult does
2150  // not try to pass 'this' straight through
2151  isThisReturn = false;
2152  Mask = ARI->getCallPreservedMask(MF, CallConv);
2153  }
2154  } else
2155  Mask = ARI->getCallPreservedMask(MF, CallConv);
2156 
2157  assert(Mask && "Missing call preserved mask for calling convention");
2158  Ops.push_back(DAG.getRegisterMask(Mask));
2159  }
2160 
2161  if (InFlag.getNode())
2162  Ops.push_back(InFlag);
2163 
2164  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2165  if (isTailCall) {
2167  return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2168  }
2169 
2170  // Returns a chain and a flag for retval copy to use.
2171  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2172  InFlag = Chain.getValue(1);
2173 
2174  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2175  DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2176  if (!Ins.empty())
2177  InFlag = Chain.getValue(1);
2178 
2179  // Handle result values, copying them out of physregs into vregs that we
2180  // return.
2181  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2182  InVals, isThisReturn,
2183  isThisReturn ? OutVals[0] : SDValue());
2184 }
2185 
2186 /// HandleByVal - Every parameter *after* a byval parameter is passed
2187 /// on the stack. Remember the next parameter register to allocate,
2188 /// and then confiscate the rest of the parameter registers to insure
2189 /// this.
2190 void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2191  unsigned Align) const {
2192  // Byval (as with any stack) slots are always at least 4 byte aligned.
2193  Align = std::max(Align, 4U);
2194 
2195  unsigned Reg = State->AllocateReg(GPRArgRegs);
2196  if (!Reg)
2197  return;
2198 
2199  unsigned AlignInRegs = Align / 4;
2200  unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2201  for (unsigned i = 0; i < Waste; ++i)
2202  Reg = State->AllocateReg(GPRArgRegs);
2203 
2204  if (!Reg)
2205  return;
2206 
2207  unsigned Excess = 4 * (ARM::R4 - Reg);
2208 
2209  // Special case when NSAA != SP and parameter size greater than size of
2210  // all remained GPR regs. In that case we can't split parameter, we must
2211  // send it to stack. We also must set NCRN to R4, so waste all
2212  // remained registers.
2213  const unsigned NSAAOffset = State->getNextStackOffset();
2214  if (NSAAOffset != 0 && Size > Excess) {
2215  while (State->AllocateReg(GPRArgRegs))
2216  ;
2217  return;
2218  }
2219 
2220  // First register for byval parameter is the first register that wasn't
2221  // allocated before this method call, so it would be "reg".
2222  // If parameter is small enough to be saved in range [reg, r4), then
2223  // the end (first after last) register would be reg + param-size-in-regs,
2224  // else parameter would be splitted between registers and stack,
2225  // end register would be r4 in this case.
2226  unsigned ByValRegBegin = Reg;
2227  unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2228  State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2229  // Note, first register is allocated in the beginning of function already,
2230  // allocate remained amount of registers we need.
2231  for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2232  State->AllocateReg(GPRArgRegs);
2233  // A byval parameter that is split between registers and memory needs its
2234  // size truncated here.
2235  // In the case where the entire structure fits in registers, we set the
2236  // size in memory to zero.
2237  Size = std::max<int>(Size - Excess, 0);
2238 }
2239 
2240 /// MatchingStackOffset - Return true if the given stack call argument is
2241 /// already available in the same position (relatively) of the caller's
2242 /// incoming argument stack.
2243 static
2246  const TargetInstrInfo *TII) {
2247  unsigned Bytes = Arg.getValueSizeInBits() / 8;
2248  int FI = std::numeric_limits<int>::max();
2249  if (Arg.getOpcode() == ISD::CopyFromReg) {
2250  unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2252  return false;
2253  MachineInstr *Def = MRI->getVRegDef(VR);
2254  if (!Def)
2255  return false;
2256  if (!Flags.isByVal()) {
2257  if (!TII->isLoadFromStackSlot(*Def, FI))
2258  return false;
2259  } else {
2260  return false;
2261  }
2262  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2263  if (Flags.isByVal())
2264  // ByVal argument is passed in as a pointer but it's now being
2265  // dereferenced. e.g.
2266  // define @foo(%struct.X* %A) {
2267  // tail call @bar(%struct.X* byval %A)
2268  // }
2269  return false;
2270  SDValue Ptr = Ld->getBasePtr();
2271  FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2272  if (!FINode)
2273  return false;
2274  FI = FINode->getIndex();
2275  } else
2276  return false;
2277 
2279  if (!MFI.isFixedObjectIndex(FI))
2280  return false;
2281  return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2282 }
2283 
2284 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
2285 /// for tail call optimization. Targets which want to do tail call
2286 /// optimization should implement this function.
2287 bool
2288 ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
2289  CallingConv::ID CalleeCC,
2290  bool isVarArg,
2291  bool isCalleeStructRet,
2292  bool isCallerStructRet,
2293  const SmallVectorImpl<ISD::OutputArg> &Outs,
2294  const SmallVectorImpl<SDValue> &OutVals,
2295  const SmallVectorImpl<ISD::InputArg> &Ins,
2296  SelectionDAG& DAG) const {
2297  MachineFunction &MF = DAG.getMachineFunction();
2298  const Function &CallerF = MF.getFunction();
2299  CallingConv::ID CallerCC = CallerF.getCallingConv();
2300 
2301  assert(Subtarget->supportsTailCall());
2302 
2303  // Tail calls to function pointers cannot be optimized for Thumb1 if the args
2304  // to the call take up r0-r3. The reason is that there are no legal registers
2305  // left to hold the pointer to the function to be called.
2306  if (Subtarget->isThumb1Only() && Outs.size() >= 4 &&
2307  !isa<GlobalAddressSDNode>(Callee.getNode()))
2308  return false;
2309 
2310  // Look for obvious safe cases to perform tail call optimization that do not
2311  // require ABI changes. This is what gcc calls sibcall.
2312 
2313  // Exception-handling functions need a special set of instructions to indicate
2314  // a return to the hardware. Tail-calling another function would probably
2315  // break this.
2316  if (CallerF.hasFnAttribute("interrupt"))
2317  return false;
2318 
2319  // Also avoid sibcall optimization if either caller or callee uses struct
2320  // return semantics.
2321  if (isCalleeStructRet || isCallerStructRet)
2322  return false;
2323 
2324  // Externally-defined functions with weak linkage should not be
2325  // tail-called on ARM when the OS does not support dynamic
2326  // pre-emption of symbols, as the AAELF spec requires normal calls
2327  // to undefined weak functions to be replaced with a NOP or jump to the
2328  // next instruction. The behaviour of branch instructions in this
2329  // situation (as used for tail calls) is implementation-defined, so we
2330  // cannot rely on the linker replacing the tail call with a return.
2331  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2332  const GlobalValue *GV = G->getGlobal();
2333  const Triple &TT = getTargetMachine().getTargetTriple();
2334  if (GV->hasExternalWeakLinkage() &&
2335  (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2336  return false;
2337  }
2338 
2339  // Check that the call results are passed in the same way.
2340  LLVMContext &C = *DAG.getContext();
2341  if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2342  CCAssignFnForReturn(CalleeCC, isVarArg),
2343  CCAssignFnForReturn(CallerCC, isVarArg)))
2344  return false;
2345  // The callee has to preserve all registers the caller needs to preserve.
2346  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2347  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2348  if (CalleeCC != CallerCC) {
2349  const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2350  if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2351  return false;
2352  }
2353 
2354  // If Caller's vararg or byval argument has been split between registers and
2355  // stack, do not perform tail call, since part of the argument is in caller's
2356  // local frame.
2357  const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
2358  if (AFI_Caller->getArgRegsSaveSize())
2359  return false;
2360 
2361  // If the callee takes no arguments then go on to check the results of the
2362  // call.
2363  if (!Outs.empty()) {
2364  // Check if stack adjustment is needed. For now, do not do this if any
2365  // argument is passed on the stack.
2367  CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2368  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2369  if (CCInfo.getNextStackOffset()) {
2370  // Check if the arguments are already laid out in the right way as
2371  // the caller's fixed stack objects.
2372  MachineFrameInfo &MFI = MF.getFrameInfo();
2373  const MachineRegisterInfo *MRI = &MF.getRegInfo();
2374  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2375  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2376  i != e;
2377  ++i, ++realArgIdx) {
2378  CCValAssign &VA = ArgLocs[i];
2379  EVT RegVT = VA.getLocVT();
2380  SDValue Arg = OutVals[realArgIdx];
2381  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2382  if (VA.getLocInfo() == CCValAssign::Indirect)
2383  return false;
2384  if (VA.needsCustom()) {
2385  // f64 and vector types are split into multiple registers or
2386  // register/stack-slot combinations. The types will not match
2387  // the registers; give up on memory f64 refs until we figure
2388  // out what to do about this.
2389  if (!VA.isRegLoc())
2390  return false;
2391  if (!ArgLocs[++i].isRegLoc())
2392  return false;
2393  if (RegVT == MVT::v2f64) {
2394  if (!ArgLocs[++i].isRegLoc())
2395  return false;
2396  if (!ArgLocs[++i].isRegLoc())
2397  return false;
2398  }
2399  } else if (!VA.isRegLoc()) {
2400  if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
2401  MFI, MRI, TII))
2402  return false;
2403  }
2404  }
2405  }
2406 
2407  const MachineRegisterInfo &MRI = MF.getRegInfo();
2408  if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2409  return false;
2410  }
2411 
2412  return true;
2413 }
2414 
2415 bool
2416 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2417  MachineFunction &MF, bool isVarArg,
2418  const SmallVectorImpl<ISD::OutputArg> &Outs,
2419  LLVMContext &Context) const {
2421  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2422  return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2423 }
2424 
2426  const SDLoc &DL, SelectionDAG &DAG) {
2427  const MachineFunction &MF = DAG.getMachineFunction();
2428  const Function &F = MF.getFunction();
2429 
2430  StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
2431 
2432  // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
2433  // version of the "preferred return address". These offsets affect the return
2434  // instruction if this is a return from PL1 without hypervisor extensions.
2435  // IRQ/FIQ: +4 "subs pc, lr, #4"
2436  // SWI: 0 "subs pc, lr, #0"
2437  // ABORT: +4 "subs pc, lr, #4"
2438  // UNDEF: +4/+2 "subs pc, lr, #0"
2439  // UNDEF varies depending on where the exception came from ARM or Thumb
2440  // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
2441 
2442  int64_t LROffset;
2443  if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
2444  IntKind == "ABORT")
2445  LROffset = 4;
2446  else if (IntKind == "SWI" || IntKind == "UNDEF")
2447  LROffset = 0;
2448  else
2449  report_fatal_error("Unsupported interrupt attribute. If present, value "
2450  "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2451 
2452  RetOps.insert(RetOps.begin() + 1,
2453  DAG.getConstant(LROffset, DL, MVT::i32, false));
2454 
2455  return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
2456 }
2457 
2458 SDValue
2459 ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2460  bool isVarArg,
2461  const SmallVectorImpl<ISD::OutputArg> &Outs,
2462  const SmallVectorImpl<SDValue> &OutVals,
2463  const SDLoc &dl, SelectionDAG &DAG) const {
2464  // CCValAssign - represent the assignment of the return value to a location.
2466 
2467  // CCState - Info about the registers and stack slots.
2468  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2469  *DAG.getContext());
2470 
2471  // Analyze outgoing return values.
2472  CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2473 
2474  SDValue Flag;
2475  SmallVector<SDValue, 4> RetOps;
2476  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2477  bool isLittleEndian = Subtarget->isLittle();
2478 
2479  MachineFunction &MF = DAG.getMachineFunction();
2481  AFI->setReturnRegsCount(RVLocs.size());
2482 
2483  // Copy the result values into the output registers.
2484  for (unsigned i = 0, realRVLocIdx = 0;
2485  i != RVLocs.size();
2486  ++i, ++realRVLocIdx) {
2487  CCValAssign &VA = RVLocs[i];
2488  assert(VA.isRegLoc() && "Can only return in registers!");
2489 
2490  SDValue Arg = OutVals[realRVLocIdx];
2491  bool ReturnF16 = false;
2492 
2493  if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
2494  // Half-precision return values can be returned like this:
2495  //
2496  // t11 f16 = fadd ...
2497  // t12: i16 = bitcast t11
2498  // t13: i32 = zero_extend t12
2499  // t14: f32 = bitcast t13 <~~~~~~~ Arg
2500  //
2501  // to avoid code generation for bitcasts, we simply set Arg to the node
2502  // that produces the f16 value, t11 in this case.
2503  //
2504  if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
2505  SDValue ZE = Arg.getOperand(0);
2506  if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
2507  SDValue BC = ZE.getOperand(0);
2508  if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
2509  Arg = BC.getOperand(0);
2510  ReturnF16 = true;
2511  }
2512  }
2513  }
2514  }
2515 
2516  switch (VA.getLocInfo()) {
2517  default: llvm_unreachable("Unknown loc info!");
2518  case CCValAssign::Full: break;
2519  case CCValAssign::BCvt:
2520  if (!ReturnF16)
2521  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2522  break;
2523  }
2524 
2525  if (VA.needsCustom()) {
2526  if (VA.getLocVT() == MVT::v2f64) {
2527  // Extract the first half and return it in two registers.
2528  SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2529  DAG.getConstant(0, dl, MVT::i32));
2530  SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
2531  DAG.getVTList(MVT::i32, MVT::i32), Half);
2532 
2533  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2534  HalfGPRs.getValue(isLittleEndian ? 0 : 1),
2535  Flag);
2536  Flag = Chain.getValue(1);
2537  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2538  VA = RVLocs[++i]; // skip ahead to next loc
2539  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2540  HalfGPRs.getValue(isLittleEndian ? 1 : 0),
2541  Flag);
2542  Flag = Chain.getValue(1);
2543  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2544  VA = RVLocs[++i]; // skip ahead to next loc
2545 
2546  // Extract the 2nd half and fall through to handle it as an f64 value.
2547  Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2548  DAG.getConstant(1, dl, MVT::i32));
2549  }
2550  // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
2551  // available.
2552  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2553  DAG.getVTList(MVT::i32, MVT::i32), Arg);
2554  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2555  fmrrd.getValue(isLittleEndian ? 0 : 1),
2556  Flag);
2557  Flag = Chain.getValue(1);
2558  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2559  VA = RVLocs[++i]; // skip ahead to next loc
2560  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2561  fmrrd.getValue(isLittleEndian ? 1 : 0),
2562  Flag);
2563  } else
2564  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
2565 
2566  // Guarantee that all emitted copies are
2567  // stuck together, avoiding something bad.
2568  Flag = Chain.getValue(1);
2569  RetOps.push_back(DAG.getRegister(VA.getLocReg(),
2570  ReturnF16 ? MVT::f16 : VA.getLocVT()));
2571  }
2572  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2573  const MCPhysReg *I =
2575  if (I) {
2576  for (; *I; ++I) {
2577  if (ARM::GPRRegClass.contains(*I))
2578  RetOps.push_back(DAG.getRegister(*I, MVT::i32));
2579  else if (ARM::DPRRegClass.contains(*I))
2580  RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
2581  else
2582  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
2583  }
2584  }
2585 
2586  // Update chain and glue.
2587  RetOps[0] = Chain;
2588  if (Flag.getNode())
2589  RetOps.push_back(Flag);
2590 
2591  // CPUs which aren't M-class use a special sequence to return from
2592  // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
2593  // though we use "subs pc, lr, #N").
2594  //
2595  // M-class CPUs actually use a normal return sequence with a special
2596  // (hardware-provided) value in LR, so the normal code path works.
2597  if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
2598  !Subtarget->isMClass()) {
2599  if (Subtarget->isThumb1Only())
2600  report_fatal_error("interrupt attribute is not supported in Thumb1");
2601  return LowerInterruptReturn(RetOps, dl, DAG);
2602  }
2603 
2604  return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
2605 }
2606 
2607 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2608  if (N->getNumValues() != 1)
2609  return false;
2610  if (!N->hasNUsesOfValue(1, 0))
2611  return false;
2612 
2613  SDValue TCChain = Chain;
2614  SDNode *Copy = *N->use_begin();
2615  if (Copy->getOpcode() == ISD::CopyToReg) {
2616  // If the copy has a glue operand, we conservatively assume it isn't safe to
2617  // perform a tail call.
2618  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2619  return false;
2620  TCChain = Copy->getOperand(0);
2621  } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
2622  SDNode *VMov = Copy;
2623  // f64 returned in a pair of GPRs.
2625  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2626  UI != UE; ++UI) {
2627  if (UI->getOpcode() != ISD::CopyToReg)
2628  return false;
2629  Copies.insert(*UI);
2630  }
2631  if (Copies.size() > 2)
2632  return false;
2633 
2634  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2635  UI != UE; ++UI) {
2636  SDValue UseChain = UI->getOperand(0);
2637  if (Copies.count(UseChain.getNode()))
2638  // Second CopyToReg
2639  Copy = *UI;
2640  else {
2641  // We are at the top of this chain.
2642  // If the copy has a glue operand, we conservatively assume it
2643  // isn't safe to perform a tail call.
2644  if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
2645  return false;
2646  // First CopyToReg
2647  TCChain = UseChain;
2648  }
2649  }
2650  } else if (Copy->getOpcode() == ISD::BITCAST) {
2651  // f32 returned in a single GPR.
2652  if (!Copy->hasOneUse())
2653  return false;
2654  Copy = *Copy->use_begin();
2655  if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
2656  return false;
2657  // If the copy has a glue operand, we conservatively assume it isn't safe to
2658  // perform a tail call.
2659  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2660  return false;
2661  TCChain = Copy->getOperand(0);
2662  } else {
2663  return false;
2664  }
2665 
2666  bool HasRet = false;
2667  for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2668  UI != UE; ++UI) {
2669  if (UI->getOpcode() != ARMISD::RET_FLAG &&
2670  UI->getOpcode() != ARMISD::INTRET_FLAG)
2671  return false;
2672  HasRet = true;
2673  }
2674 
2675  if (!HasRet)
2676  return false;
2677 
2678  Chain = TCChain;
2679  return true;
2680 }
2681 
2682 bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2683  if (!Subtarget->supportsTailCall())
2684  return false;
2685 
2686  auto Attr =
2687  CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2688  if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2689  return false;
2690 
2691  return true;
2692 }
2693 
2694 // Trying to write a 64 bit value so need to split into two 32 bit values first,
2695 // and pass the lower and high parts through.
2697  SDLoc DL(Op);
2698  SDValue WriteValue = Op->getOperand(2);
2699 
2700  // This function is only supposed to be called for i64 type argument.
2701  assert(WriteValue.getValueType() == MVT::i64
2702  && "LowerWRITE_REGISTER called for non-i64 type argument.");
2703 
2704  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2705  DAG.getConstant(0, DL, MVT::i32));
2706  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2707  DAG.getConstant(1, DL, MVT::i32));
2708  SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
2709  return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
2710 }
2711 
2712 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
2713 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
2714 // one of the above mentioned nodes. It has to be wrapped because otherwise
2715 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2716 // be used to form addressing mode. These wrapped nodes will be selected
2717 // into MOVi.
2718 SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
2719  SelectionDAG &DAG) const {
2720  EVT PtrVT = Op.getValueType();
2721  // FIXME there is no actual debug info here
2722  SDLoc dl(Op);
2723  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2724  SDValue Res;
2725 
2726  // When generating execute-only code Constant Pools must be promoted to the
2727  // global data section. It's a bit ugly that we can't share them across basic
2728  // blocks, but this way we guarantee that execute-only behaves correct with
2729  // position-independent addressing modes.
2730  if (Subtarget->genExecuteOnly()) {
2731  auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
2732  auto T = const_cast<Type*>(CP->getType());
2733  auto C = const_cast<Constant*>(CP->getConstVal());
2734  auto M = const_cast<Module*>(DAG.getMachineFunction().
2735  getFunction().getParent());
2736  auto GV = new GlobalVariable(
2737  *M, T, /*isConst=*/true, GlobalVariable::InternalLinkage, C,
2738  Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
2739  Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
2740  Twine(AFI->createPICLabelUId())
2741  );
2742  SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
2743  dl, PtrVT);
2744  return LowerGlobalAddress(GA, DAG);
2745  }
2746 
2747  if (CP->isMachineConstantPoolEntry())
2748  Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2749  CP->getAlignment());
2750  else
2751  Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2752  CP->getAlignment());
2753  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
2754 }
2755 
2758 }
2759 
2760 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
2761  SelectionDAG &DAG) const {
2762  MachineFunction &MF = DAG.getMachineFunction();
2764  unsigned ARMPCLabelIndex = 0;
2765  SDLoc DL(Op);
2766  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2767  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
2768  SDValue CPAddr;
2769  bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
2770  if (!IsPositionIndependent) {
2771  CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
2772  } else {
2773  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2774  ARMPCLabelIndex = AFI->createPICLabelUId();
2775  ARMConstantPoolValue *CPV =
2776  ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
2777  ARMCP::CPBlockAddress, PCAdj);
2778  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2779  }
2780  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
2781  SDValue Result = DAG.getLoad(
2782  PtrVT, DL, DAG.getEntryNode(), CPAddr,
2784  if (!IsPositionIndependent)
2785  return Result;
2786  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
2787  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
2788 }
2789 
2790 /// Convert a TLS address reference into the correct sequence of loads
2791 /// and calls to compute the variable's address for Darwin, and return an
2792 /// SDValue containing the final node.
2793 
2794 /// Darwin only has one TLS scheme which must be capable of dealing with the
2795 /// fully general situation, in the worst case. This means:
2796 /// + "extern __thread" declaration.
2797 /// + Defined in a possibly unknown dynamic library.
2798 ///
2799 /// The general system is that each __thread variable has a [3 x i32] descriptor
2800 /// which contains information used by the runtime to calculate the address. The
2801 /// only part of this the compiler needs to know about is the first word, which
2802 /// contains a function pointer that must be called with the address of the
2803 /// entire descriptor in "r0".
2804 ///
2805 /// Since this descriptor may be in a different unit, in general access must
2806 /// proceed along the usual ARM rules. A common sequence to produce is:
2807 ///
2808 /// movw rT1, :lower16:_var$non_lazy_ptr
2809 /// movt rT1, :upper16:_var$non_lazy_ptr
2810 /// ldr r0, [rT1]
2811 /// ldr rT2, [r0]
2812 /// blx rT2
2813 /// [...address now in r0...]
2814 SDValue
2815 ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
2816  SelectionDAG &DAG) const {
2817  assert(Subtarget->isTargetDarwin() &&
2818  "This function expects a Darwin target");
2819  SDLoc DL(Op);
2820 
2821  // First step is to get the address of the actua global symbol. This is where
2822  // the TLS descriptor lives.
2823  SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
2824 
2825  // The first entry in the descriptor is a function pointer that we must call
2826  // to obtain the address of the variable.
2827  SDValue Chain = DAG.getEntryNode();
2828  SDValue FuncTLVGet = DAG.getLoad(
2829  MVT::i32, DL, Chain, DescAddr,
2831  /* Alignment = */ 4,
2834  Chain = FuncTLVGet.getValue(1);
2835 
2837  MachineFrameInfo &MFI = F.getFrameInfo();
2838  MFI.setAdjustsStack(true);
2839 
2840  // TLS calls preserve all registers except those that absolutely must be
2841  // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
2842  // silly).
2843  auto TRI =
2844  getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo();
2845  auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
2846  const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
2847 
2848  // Finally, we can make the call. This is just a degenerate version of a
2849  // normal AArch64 call node: r0 takes the address of the descriptor, and
2850  // returns the address of the variable in this thread.
2851  Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
2852  Chain =
2854  Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
2855  DAG.getRegisterMask(Mask), Chain.getValue(1));
2856  return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
2857 }
2858 
2859 SDValue
2860 ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
2861  SelectionDAG &DAG) const {
2862  assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
2863 
2864  SDValue Chain = DAG.getEntryNode();
2865  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2866  SDLoc DL(Op);
2867 
2868  // Load the current TEB (thread environment block)
2869  SDValue Ops[] = {Chain,
2871  DAG.getConstant(15, DL, MVT::i32),
2872  DAG.getConstant(0, DL, MVT::i32),
2873  DAG.getConstant(13, DL, MVT::i32),
2874  DAG.getConstant(0, DL, MVT::i32),
2875  DAG.getConstant(2, DL, MVT::i32)};
2876  SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
2877  DAG.getVTList(MVT::i32, MVT::Other), Ops);
2878 
2879  SDValue TEB = CurrentTEB.getValue(0);
2880  Chain = CurrentTEB.getValue(1);
2881 
2882  // Load the ThreadLocalStoragePointer from the TEB
2883  // A pointer to the TLS array is located at offset 0x2c from the TEB.
2884  SDValue TLSArray =
2885  DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
2886  TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
2887 
2888  // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
2889  // offset into the TLSArray.
2890 
2891  // Load the TLS index from the C runtime
2892  SDValue TLSIndex =
2893  DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
2894  TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
2895  TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
2896 
2897  SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
2898  DAG.getConstant(2, DL, MVT::i32));
2899  SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
2900  DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
2901  MachinePointerInfo());
2902 
2903  // Get the offset of the start of the .tls section (section base)
2904  const auto *GA = cast<GlobalAddressSDNode>(Op);
2905  auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
2906  SDValue Offset = DAG.getLoad(
2907  PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
2908  DAG.getTargetConstantPool(CPV, PtrVT, 4)),
2910 
2911  return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
2912 }
2913 
2914 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
2915 SDValue
2916 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
2917  SelectionDAG &DAG) const {
2918  SDLoc dl(GA);
2919  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2920  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2921  MachineFunction &MF = DAG.getMachineFunction();
2923  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2924  ARMConstantPoolValue *CPV =
2925  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2926  ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
2927  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2928  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
2929  Argument = DAG.getLoad(
2930  PtrVT, dl, DAG.getEntryNode(), Argument,
2932  SDValue Chain = Argument.getValue(1);
2933 
2934  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2935  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
2936 
2937  // call __tls_get_addr.
2938  ArgListTy Args;
2939  ArgListEntry Entry;
2940  Entry.Node = Argument;
2941  Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
2942  Args.push_back(Entry);
2943 
2944  // FIXME: is there useful debug info available here?
2946  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
2948  DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
2949 
2950  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2951  return CallResult.first;
2952 }
2953 
2954 // Lower ISD::GlobalTLSAddress using the "initial exec" or
2955 // "local exec" model.
2956 SDValue
2957 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
2958  SelectionDAG &DAG,
2959  TLSModel::Model model) const {
2960  const GlobalValue *GV = GA->getGlobal();
2961  SDLoc dl(GA);
2962  SDValue Offset;
2963  SDValue Chain = DAG.getEntryNode();
2964  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2965  // Get the Thread Pointer
2967 
2968  if (model == TLSModel::InitialExec) {
2969  MachineFunction &MF = DAG.getMachineFunction();
2971  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2972  // Initial exec model.
2973  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2974  ARMConstantPoolValue *CPV =
2975  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2977  true);
2978  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2979  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2980  Offset = DAG.getLoad(
2981  PtrVT, dl, Chain, Offset,
2983  Chain = Offset.getValue(1);
2984 
2985  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2986  Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
2987 
2988  Offset = DAG.getLoad(
2989  PtrVT, dl, Chain, Offset,
2991  } else {
2992  // local exec model
2993  assert(model == TLSModel::LocalExec);
2994  ARMConstantPoolValue *CPV =
2996  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2997  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2998  Offset = DAG.getLoad(
2999  PtrVT, dl, Chain, Offset,
3001  }
3002 
3003  // The address of the thread local variable is the add of the thread
3004  // pointer with the offset of the variable.
3005  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3006 }
3007 
3008 SDValue
3009 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3010  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3011  if (DAG.getTarget().useEmulatedTLS())
3012  return LowerToTLSEmulatedModel(GA, DAG);
3013 
3014  if (Subtarget->isTargetDarwin())
3015  return LowerGlobalTLSAddressDarwin(Op, DAG);
3016 
3017  if (Subtarget->isTargetWindows())
3018  return LowerGlobalTLSAddressWindows(Op, DAG);
3019 
3020  // TODO: implement the "local dynamic" model
3021  assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3023 
3024  switch (model) {
3027  return LowerToTLSGeneralDynamicModel(GA, DAG);
3028  case TLSModel::InitialExec:
3029  case TLSModel::LocalExec:
3030  return LowerToTLSExecModels(GA, DAG, model);
3031  }
3032  llvm_unreachable("bogus TLS model");
3033 }
3034 
3035 /// Return true if all users of V are within function F, looking through
3036 /// ConstantExprs.
3037 static bool allUsersAreInFunction(const Value *V, const Function *F) {
3038  SmallVector<const User*,4> Worklist;
3039  for (auto *U : V->users())
3040  Worklist.push_back(U);
3041  while (!Worklist.empty()) {
3042  auto *U = Worklist.pop_back_val();
3043  if (isa<ConstantExpr>(U)) {
3044  for (auto *UU : U->users())
3045  Worklist.push_back(UU);
3046  continue;
3047  }
3048 
3049  auto *I = dyn_cast<Instruction>(U);
3050  if (!I || I->getParent()->getParent() != F)
3051  return false;
3052  }
3053  return true;
3054 }
3055 
3057  const GlobalValue *GV, SelectionDAG &DAG,
3058  EVT PtrVT, const SDLoc &dl) {
3059  // If we're creating a pool entry for a constant global with unnamed address,
3060  // and the global is small enough, we can emit it inline into the constant pool
3061  // to save ourselves an indirection.
3062  //
3063  // This is a win if the constant is only used in one function (so it doesn't
3064  // need to be duplicated) or duplicating the constant wouldn't increase code
3065  // size (implying the constant is no larger than 4 bytes).
3066  const Function &F = DAG.getMachineFunction().getFunction();
3067 
3068  // We rely on this decision to inline being idemopotent and unrelated to the
3069  // use-site. We know that if we inline a variable at one use site, we'll
3070  // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3071  // doesn't know about this optimization, so bail out if it's enabled else
3072  // we could decide to inline here (and thus never emit the GV) but require
3073  // the GV from fast-isel generated code.
3074  if (!EnableConstpoolPromotion ||
3076  return SDValue();
3077 
3078  auto *GVar = dyn_cast<GlobalVariable>(GV);
3079  if (!GVar || !GVar->hasInitializer() ||
3080  !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3081  !GVar->hasLocalLinkage())
3082  return SDValue();
3083 
3084  // If we inline a value that contains relocations, we move the relocations
3085  // from .data to .text. This is not allowed in position-independent code.
3086  auto *Init = GVar->getInitializer();
3087  if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3088  Init->needsRelocation())
3089  return SDValue();
3090 
3091  // The constant islands pass can only really deal with alignment requests
3092  // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3093  // any type wanting greater alignment requirements than 4 bytes. We also
3094  // can only promote constants that are multiples of 4 bytes in size or
3095  // are paddable to a multiple of 4. Currently we only try and pad constants
3096  // that are strings for simplicity.
3097  auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3098  unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3099  unsigned Align = DAG.getDataLayout().getPreferredAlignment(GVar);
3100  unsigned RequiredPadding = 4 - (Size % 4);
3101  bool PaddingPossible =
3102  RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3103  if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize ||
3104  Size == 0)
3105  return SDValue();
3106 
3107  unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3108  MachineFunction &MF = DAG.getMachineFunction();
3110 
3111  // We can't bloat the constant pool too much, else the ConstantIslands pass
3112  // may fail to converge. If we haven't promoted this global yet (it may have
3113  // multiple uses), and promoting it would increase the constant pool size (Sz
3114  // > 4), ensure we have space to do so up to MaxTotal.
3115  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3116  if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3118  return SDValue();
3119 
3120  // This is only valid if all users are in a single function; we can't clone
3121  // the constant in general. The LLVM IR unnamed_addr allows merging
3122  // constants, but not cloning them.
3123  //
3124  // We could potentially allow cloning if we could prove all uses of the
3125  // constant in the current function don't care about the address, like
3126  // printf format strings. But that isn't implemented for now.
3127  if (!allUsersAreInFunction(GVar, &F))
3128  return SDValue();
3129 
3130  // We're going to inline this global. Pad it out if needed.
3131  if (RequiredPadding != 4) {
3132  StringRef S = CDAInit->getAsString();
3133 
3135  std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3136  while (RequiredPadding--)
3137  V.push_back(0);
3138  Init = ConstantDataArray::get(*DAG.getContext(), V);
3139  }
3140 
3141  auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3142  SDValue CPAddr =
3143  DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4);
3144  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3147  PaddedSize - 4);
3148  }
3149  ++NumConstpoolPromoted;
3150  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3151 }
3152 
3154  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3155  if (!(GV = GA->getBaseObject()))
3156  return false;
3157  if (const auto *V = dyn_cast<GlobalVariable>(GV))
3158  return V->isConstant();
3159  return isa<Function>(GV);
3160 }
3161 
3162 SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3163  SelectionDAG &DAG) const {
3164  switch (Subtarget->getTargetTriple().getObjectFormat()) {
3165  default: llvm_unreachable("unknown object format");
3166  case Triple::COFF:
3167  return LowerGlobalAddressWindows(Op, DAG);
3168  case Triple::ELF:
3169  return LowerGlobalAddressELF(Op, DAG);
3170  case Triple::MachO:
3171  return LowerGlobalAddressDarwin(Op, DAG);
3172  }
3173 }
3174 
3175 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3176  SelectionDAG &DAG) const {
3177  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3178  SDLoc dl(Op);
3179  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3180  const TargetMachine &TM = getTargetMachine();
3181  bool IsRO = isReadOnly(GV);
3182 
3183  // promoteToConstantPool only if not generating XO text section
3184  if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
3185  if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
3186  return V;
3187 
3188  if (isPositionIndependent()) {
3189  bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
3190  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3191  UseGOT_PREL ? ARMII::MO_GOT : 0);
3192  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3193  if (UseGOT_PREL)
3194  Result =
3195  DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3197  return Result;
3198  } else if (Subtarget->isROPI() && IsRO) {
3199  // PC-relative.
3200  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3201  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3202  return Result;
3203  } else if (Subtarget->isRWPI() && !IsRO) {
3204  // SB-relative.
3205  SDValue RelAddr;
3206  if (Subtarget->useMovt(DAG.getMachineFunction())) {
3207  ++NumMovwMovt;
3208  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3209  RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3210  } else { // use literal pool for address constant
3211  ARMConstantPoolValue *CPV =
3213  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3214  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3215  RelAddr = DAG.getLoad(
3216  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3218  }
3219  SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3220  SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3221  return Result;
3222  }
3223 
3224  // If we have T2 ops, we can materialize the address directly via movt/movw
3225  // pair. This is always cheaper.
3226  if (Subtarget->useMovt(DAG.getMachineFunction())) {
3227  ++NumMovwMovt;
3228  // FIXME: Once remat is capable of dealing with instructions with register
3229  // operands, expand this into two nodes.
3230  return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3231  DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3232  } else {
3233  SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
3234  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3235  return DAG.getLoad(
3236  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3238  }
3239 }
3240 
3241 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3242  SelectionDAG &DAG) const {
3243  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3244  "ROPI/RWPI not currently supported for Darwin");
3245  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3246  SDLoc dl(Op);
3247  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3248 
3249  if (Subtarget->useMovt(DAG.getMachineFunction()))
3250  ++NumMovwMovt;
3251 
3252  // FIXME: Once remat is capable of dealing with instructions with register
3253  // operands, expand this into multiple nodes
3254  unsigned Wrapper =
3256 
3257  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3258  SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3259 
3260  if (Subtarget->isGVIndirectSymbol(GV))
3261  Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3263  return Result;
3264 }
3265 
3266 SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
3267  SelectionDAG &DAG) const {
3268  assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
3269  assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
3270  "Windows on ARM expects to use movw/movt");
3271  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3272  "ROPI/RWPI not currently supported for Windows");
3273 
3274  const TargetMachine &TM = getTargetMachine();
3275  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3277  if (GV->hasDLLImportStorageClass())
3278  TargetFlags = ARMII::MO_DLLIMPORT;
3279  else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
3280  TargetFlags = ARMII::MO_COFFSTUB;
3281  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3282  SDValue Result;
3283  SDLoc DL(Op);
3284 
3285  ++NumMovwMovt;
3286 
3287  // FIXME: Once remat is capable of dealing with instructions with register
3288  // operands, expand this into two nodes.
3289  Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
3290  DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
3291  TargetFlags));
3292  if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
3293  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3295  return Result;
3296 }
3297 
3298 SDValue
3299 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
3300  SDLoc dl(Op);
3301  SDValue Val = DAG.getConstant(0, dl, MVT::i32);
3302  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3303  DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
3304  Op.getOperand(1), Val);
3305 }
3306 
3307 SDValue
3308 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
3309  SDLoc dl(Op);
3310  return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
3311  Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
3312 }
3313 
3314 SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
3315  SelectionDAG &DAG) const {
3316  SDLoc dl(Op);
3318  Op.getOperand(0));
3319 }
3320 
3321 SDValue
3322 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
3323  const ARMSubtarget *Subtarget) const {
3324  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3325  SDLoc dl(Op);
3326  switch (IntNo) {
3327  default: return SDValue(); // Don't custom lower most intrinsics.
3329  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3330  return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3331  }
3332  case Intrinsic::eh_sjlj_lsda: {
3333  MachineFunction &MF = DAG.getMachineFunction();
3335  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3336  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3337  SDValue CPAddr;
3338  bool IsPositionIndependent = isPositionIndependent();
3339  unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
3340  ARMConstantPoolValue *CPV =
3341  ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
3342  ARMCP::CPLSDA, PCAdj);
3343  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3344  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3345  SDValue Result = DAG.getLoad(
3346  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3348 
3349  if (IsPositionIndependent) {
3350  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3351  Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3352  }
3353  return Result;
3354  }
3356  return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
3357  Op.getOperand(1));
3360  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
3362  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3363  Op.getOperand(1), Op.getOperand(2));
3364  }
3367  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
3369  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3370  Op.getOperand(1), Op.getOperand(2));
3371  }
3374  if (Op.getValueType().isFloatingPoint())
3375  return SDValue();
3376  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
3377  ? ISD::UMIN : ISD::UMAX;
3378  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3379  Op.getOperand(1), Op.getOperand(2));
3380  }
3383  // v{min,max}s is overloaded between signed integers and floats.
3384  if (!Op.getValueType().isFloatingPoint()) {
3385  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3386  ? ISD::SMIN : ISD::SMAX;
3387  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3388  Op.getOperand(1), Op.getOperand(2));
3389  }
3390  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3392  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3393  Op.getOperand(1), Op.getOperand(2));
3394  }
3396  return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
3397  Op.getOperand(1), Op.getOperand(2));
3399  return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
3400  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3401  }
3402 }
3403 
3405  const ARMSubtarget *Subtarget) {
3406  SDLoc dl(Op);
3407  ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2));
3408  auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue());
3409  if (SSID == SyncScope::SingleThread)
3410  return Op;
3411 
3412  if (!Subtarget->hasDataBarrier()) {
3413  // Some ARMv6 cpus can support data barriers with an mcr instruction.
3414  // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
3415  // here.
3416  assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
3417  "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
3418  return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
3419  DAG.getConstant(0, dl, MVT::i32));
3420  }
3421 
3422  ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
3423  AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
3424  ARM_MB::MemBOpt Domain = ARM_MB::ISH;
3425  if (Subtarget->isMClass()) {
3426  // Only a full system barrier exists in the M-class architectures.
3427  Domain = ARM_MB::SY;
3428  } else if (Subtarget->preferISHSTBarriers() &&
3429  Ord == AtomicOrdering::Release) {
3430  // Swift happens to implement ISHST barriers in a way that's compatible with
3431  // Release semantics but weaker than ISH so we'd be fools not to use
3432  // it. Beware: other processors probably don't!
3433  Domain = ARM_MB::ISHST;
3434  }
3435 
3436  return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
3438  DAG.getConstant(Domain, dl, MVT::i32));
3439 }
3440 
3442  const ARMSubtarget *Subtarget) {
3443  // ARM pre v5TE and Thumb1 does not have preload instructions.
3444  if (!(Subtarget->isThumb2() ||
3445  (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
3446  // Just preserve the chain.
3447  return Op.getOperand(0);
3448 
3449  SDLoc dl(Op);
3450  unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
3451  if (!isRead &&
3452  (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
3453  // ARMv7 with MP extension has PLDW.
3454  return Op.getOperand(0);
3455 
3456  unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3457  if (Subtarget->isThumb()) {
3458  // Invert the bits.
3459  isRead = ~isRead & 1;
3460  isData = ~isData & 1;
3461  }
3462 
3463  return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
3464  Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
3465  DAG.getConstant(isData, dl, MVT::i32));
3466 }
3467 
3469  MachineFunction &MF = DAG.getMachineFunction();
3470  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
3471 
3472  // vastart just stores the address of the VarArgsFrameIndex slot into the
3473  // memory location argument.
3474  SDLoc dl(Op);
3475  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
3476  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3477  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3478  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3479  MachinePointerInfo(SV));
3480 }
3481 
3482 SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
3483  CCValAssign &NextVA,
3484  SDValue &Root,
3485  SelectionDAG &DAG,
3486  const SDLoc &dl) const {
3487  MachineFunction &MF = DAG.getMachineFunction();
3489 
3490  const TargetRegisterClass *RC;
3491  if (AFI->isThumb1OnlyFunction())
3492  RC = &ARM::tGPRRegClass;
3493  else
3494  RC = &ARM::GPRRegClass;
3495 
3496  // Transform the arguments stored in physical registers into virtual ones.
3497  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3498  SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3499 
3500  SDValue ArgValue2;
3501  if (NextVA.isMemLoc()) {
3502  MachineFrameInfo &MFI = MF.getFrameInfo();
3503  int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
3504 
3505  // Create load node to retrieve arguments from the stack.
3506  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3507  ArgValue2 = DAG.getLoad(
3508  MVT::i32, dl, Root, FIN,
3510  } else {
3511  Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3512  ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3513  }
3514  if (!Subtarget->isLittle())
3515  std::swap (ArgValue, ArgValue2);
3516  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
3517 }
3518 
3519 // The remaining GPRs hold either the beginning of variable-argument
3520 // data, or the beginning of an aggregate passed by value (usually
3521 // byval). Either way, we allocate stack slots adjacent to the data
3522 // provided by our caller, and store the unallocated registers there.
3523 // If this is a variadic function, the va_list pointer will begin with
3524 // these values; otherwise, this reassembles a (byval) structure that
3525 // was split between registers and memory.
3526 // Return: The frame index registers were stored into.
3527 int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
3528  const SDLoc &dl, SDValue &Chain,
3529  const Value *OrigArg,
3530  unsigned InRegsParamRecordIdx,
3531  int ArgOffset, unsigned ArgSize) const {
3532  // Currently, two use-cases possible:
3533  // Case #1. Non-var-args function, and we meet first byval parameter.
3534  // Setup first unallocated register as first byval register;
3535  // eat all remained registers
3536  // (these two actions are performed by HandleByVal method).
3537  // Then, here, we initialize stack frame with
3538  // "store-reg" instructions.
3539  // Case #2. Var-args function, that doesn't contain byval parameters.
3540  // The same: eat all remained unallocated registers,
3541  // initialize stack frame.
3542 
3543  MachineFunction &MF = DAG.getMachineFunction();
3544  MachineFrameInfo &MFI = MF.getFrameInfo();
3546  unsigned RBegin, REnd;
3547  if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
3548  CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
3549  } else {
3550  unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3551  RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
3552  REnd = ARM::R4;
3553  }
3554 
3555  if (REnd != RBegin)
3556  ArgOffset = -4 * (ARM::R4 - RBegin);
3557 
3558  auto PtrVT = getPointerTy(DAG.getDataLayout());
3559  int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
3560  SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
3561 
3562  SmallVector<SDValue, 4> MemOps;
3563  const TargetRegisterClass *RC =
3564  AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
3565 
3566  for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
3567  unsigned VReg = MF.addLiveIn(Reg, RC);
3568  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3569  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3570  MachinePointerInfo(OrigArg, 4 * i));
3571  MemOps.push_back(Store);
3572  FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
3573  }
3574 
3575  if (!MemOps.empty())
3576  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3577  return FrameIndex;
3578 }
3579 
3580 // Setup stack frame, the va_list pointer will start from.
3581 void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
3582  const SDLoc &dl, SDValue &Chain,
3583  unsigned ArgOffset,
3584  unsigned TotalArgRegsSaveSize,
3585  bool ForceMutable) const {
3586  MachineFunction &MF = DAG.getMachineFunction();
3588 
3589  // Try to store any remaining integer argument regs
3590  // to their spots on the stack so that they may be loaded by dereferencing
3591  // the result of va_next.
3592  // If there is no regs to be stored, just point address after last
3593  // argument passed via stack.
3594  int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
3595  CCInfo.getInRegsParamsCount(),
3596  CCInfo.getNextStackOffset(), 4);
3597  AFI->setVarArgsFrameIndex(FrameIndex);
3598 }
3599 
3600 SDValue ARMTargetLowering::LowerFormalArguments(
3601  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3602  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3603  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3604  MachineFunction &MF = DAG.getMachineFunction();
3605  MachineFrameInfo &MFI = MF.getFrameInfo();
3606 
3608 
3609  // Assign locations to all of the incoming arguments.
3611  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3612  *DAG.getContext());
3613  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
3614 
3615  SmallVector<SDValue, 16> ArgValues;
3616  SDValue ArgValue;
3618  unsigned CurArgIdx = 0;
3619 
3620  // Initially ArgRegsSaveSize is zero.
3621  // Then we increase this value each time we meet byval parameter.
3622  // We also increase this value in case of varargs function.
3623  AFI->setArgRegsSaveSize(0);
3624 
3625  // Calculate the amount of stack space that we need to allocate to store
3626  // byval and variadic arguments that are passed in registers.
3627  // We need to know this before we allocate the first byval or variadic
3628  // argument, as they will be allocated a stack slot below the CFA (Canonical
3629  // Frame Address, the stack pointer at entry to the function).
3630  unsigned ArgRegBegin = ARM::R4;
3631  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3632  if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
3633  break;
3634 
3635  CCValAssign &VA = ArgLocs[i];
3636  unsigned Index = VA.getValNo();
3637  ISD::ArgFlagsTy Flags = Ins[Index].Flags;
3638  if (!Flags.isByVal())
3639  continue;
3640 
3641  assert(VA.isMemLoc() && "unexpected byval pointer in reg");
3642  unsigned RBegin, REnd;
3643  CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
3644  ArgRegBegin = std::min(ArgRegBegin, RBegin);
3645 
3646  CCInfo.nextInRegsParam();
3647  }
3648  CCInfo.rewindByValRegsInfo();
3649 
3650  int lastInsIndex = -1;
3651  if (isVarArg && MFI.hasVAStart()) {
3652  unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3653  if (RegIdx != array_lengthof(GPRArgRegs))
3654  ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
3655  }
3656 
3657  unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
3658  AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
3659  auto PtrVT = getPointerTy(DAG.getDataLayout());
3660 
3661  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3662  CCValAssign &VA = ArgLocs[i];
3663  if (Ins[VA.getValNo()].isOrigArg()) {
3664  std::advance(CurOrigArg,
3665  Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
3666  CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
3667  }
3668  // Arguments stored in registers.
3669  if (VA.isRegLoc()) {
3670  EVT RegVT = VA.getLocVT();
3671 
3672  if (VA.needsCustom()) {
3673  // f64 and vector types are split up into multiple registers or
3674  // combinations of registers and stack slots.
3675  if (VA.getLocVT() == MVT::v2f64) {
3676  SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
3677  Chain, DAG, dl);
3678  VA = ArgLocs[++i]; // skip ahead to next loc
3679  SDValue ArgValue2;
3680  if (VA.isMemLoc()) {
3681  int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
3682  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3683  ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
3685  DAG.getMachineFunction(), FI));
3686  } else {
3687  ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
3688  Chain, DAG, dl);
3689  }
3690  ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
3691  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3692  ArgValue, ArgValue1,
3693  DAG.getIntPtrConstant(0, dl));
3694  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3695  ArgValue, ArgValue2,
3696  DAG.getIntPtrConstant(1, dl));
3697  } else
3698  ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
3699  } else {
3700  const TargetRegisterClass *RC;
3701 
3702 
3703  if (RegVT == MVT::f16)
3704  RC = &ARM::HPRRegClass;
3705  else if (RegVT == MVT::f32)
3706  RC = &ARM::SPRRegClass;
3707  else if (RegVT == MVT::f64 || RegVT == MVT::v4f16)
3708  RC = &ARM::DPRRegClass;
3709  else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16)
3710  RC = &ARM::QPRRegClass;
3711  else if (RegVT == MVT::i32)
3712  RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
3713  : &ARM::GPRRegClass;
3714  else
3715  llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
3716 
3717  // Transform the arguments in physical registers into virtual ones.
3718  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3719  ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3720  }
3721 
3722  // If this is an 8 or 16-bit value, it is really passed promoted
3723  // to 32 bits. Insert an assert[sz]ext to capture this, then
3724  // truncate to the right size.
3725  switch (VA.getLocInfo()) {
3726  default: llvm_unreachable("Unknown loc info!");
3727  case CCValAssign::Full: break;
3728  case CCValAssign::BCvt:
3729  ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
3730  break;
3731  case CCValAssign::SExt:
3732  ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3733  DAG.getValueType(VA.getValVT()));
3734  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3735  break;
3736  case CCValAssign::ZExt:
3737  ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3738  DAG.getValueType(VA.getValVT()));
3739  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3740  break;
3741  }
3742 
3743  InVals.push_back(ArgValue);
3744  } else { // VA.isRegLoc()
3745  // sanity check
3746  assert(VA.isMemLoc());
3747  assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
3748 
3749  int index = VA.getValNo();
3750 
3751  // Some Ins[] entries become multiple ArgLoc[] entries.
3752  // Process them only once.
3753  if (index != lastInsIndex)
3754  {
3755  ISD::ArgFlagsTy Flags = Ins[index].Flags;
3756  // FIXME: For now, all byval parameter objects are marked mutable.
3757  // This can be changed with more analysis.
3758  // In case of tail call optimization mark all arguments mutable.
3759  // Since they could be overwritten by lowering of arguments in case of
3760  // a tail call.
3761  if (Flags.isByVal()) {
3762  assert(Ins[index].isOrigArg() &&
3763  "Byval arguments cannot be implicit");
3764  unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
3765 
3766  int FrameIndex = StoreByValRegs(
3767  CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
3768  VA.getLocMemOffset(), Flags.getByValSize());
3769  InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
3770  CCInfo.nextInRegsParam();
3771  } else {
3772  unsigned FIOffset = VA.getLocMemOffset();
3773  int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
3774  FIOffset, true);
3775 
3776  // Create load nodes to retrieve arguments from the stack.
3777  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3778  InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
3780  DAG.getMachineFunction(), FI)));
3781  }
3782  lastInsIndex = index;
3783  }
3784  }
3785  }
3786 
3787  // varargs
3788  if (isVarArg && MFI.hasVAStart())
3789  VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
3790  CCInfo.getNextStackOffset(),
3791  TotalArgRegsSaveSize);
3792 
3794 
3795  return Chain;
3796 }
3797 
3798 /// isFloatingPointZero - Return true if this is +0.0.
3799 static bool isFloatingPointZero(SDValue Op) {
3800  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
3801  return CFP->getValueAPF().isPosZero();
3802  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
3803  // Maybe this has already been legalized into the constant pool?
3804  if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
3805  SDValue WrapperOp = Op.getOperand(1).getOperand(0);
3806  if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
3807  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
3808  return CFP->getValueAPF().isPosZero();
3809  }
3810  } else if (Op->getOpcode() == ISD::BITCAST &&
3811  Op->getValueType(0) == MVT::f64) {
3812  // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
3813  // created by LowerConstantFP().
3814  SDValue BitcastOp = Op->getOperand(0);
3815  if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
3816  isNullConstant(BitcastOp->getOperand(0)))
3817  return true;
3818  }
3819  return false;
3820 }
3821 
3822 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
3823 /// the given operands.
3824 SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3825  SDValue &ARMcc, SelectionDAG &DAG,
3826  const SDLoc &dl) const {
3827  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
3828  unsigned C = RHSC->getZExtValue();
3829  if (!isLegalICmpImmediate((int32_t)C)) {
3830  // Constant does not fit, try adjusting it by one.
3831  switch (CC) {
3832  default: break;
3833  case ISD::SETLT:
3834  case ISD::SETGE:
3835  if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
3836  CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
3837  RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3838  }
3839  break;
3840  case ISD::SETULT:
3841  case ISD::SETUGE:
3842  if (C != 0 && isLegalICmpImmediate(C-1)) {
3843  CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
3844  RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3845  }
3846  break;
3847  case ISD::SETLE:
3848  case ISD::SETGT:
3849  if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
3850  CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
3851  RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3852  }
3853  break;
3854  case ISD::SETULE:
3855  case ISD::SETUGT:
3856  if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
3857  CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
3858  RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3859  }
3860  break;
3861  }
3862  }
3863  } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&
3865  // In ARM and Thumb-2, the compare instructions can shift their second
3866  // operand.
3868  std::swap(LHS, RHS);
3869  }
3870 
3872  ARMISD::NodeType CompareType;
3873  switch (CondCode) {
3874  default:
3875  CompareType = ARMISD::CMP;
3876  break;
3877  case ARMCC::EQ:
3878  case ARMCC::NE:
3879  // Uses only Z Flag
3880  CompareType = ARMISD::CMPZ;
3881  break;
3882  }
3883  ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
3884  return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
3885 }
3886 
3887 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
3888 SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
3889  SelectionDAG &DAG, const SDLoc &dl,
3890  bool InvalidOnQNaN) const {
3891  assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
3892  SDValue Cmp;
3893  SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32);
3894  if (!isFloatingPointZero(RHS))
3895  Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS, C);
3896  else
3897  Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS, C);
3898  return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
3899 }
3900 
3901 /// duplicateCmp - Glue values can have only one use, so this function
3902 /// duplicates a comparison node.
3903 SDValue
3904 ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
3905  unsigned Opc = Cmp.getOpcode();
3906  SDLoc DL(Cmp);
3907  if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
3908  return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
3909 
3910  assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
3911  Cmp = Cmp.getOperand(0);
3912  Opc = Cmp.getOpcode();
3913  if (Opc == ARMISD::CMPFP)
3914  Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
3915  Cmp.getOperand(1), Cmp.getOperand(2));
3916  else {
3917  assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
3918  Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
3919  Cmp.getOperand(1));
3920  }
3921  return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
3922 }
3923 
3924 // This function returns three things: the arithmetic computation itself
3925 // (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The
3926 // comparison and the condition code define the case in which the arithmetic
3927 // computation *does not* overflow.
3928 std::pair<SDValue, SDValue>
3929 ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
3930  SDValue &ARMcc) const {
3931  assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
3932 
3933  SDValue Value, OverflowCmp;
3934  SDValue LHS = Op.getOperand(0);
3935  SDValue RHS = Op.getOperand(1);
3936  SDLoc dl(Op);
3937 
3938  // FIXME: We are currently always generating CMPs because we don't support
3939  // generating CMN through the backend. This is not as good as the natural
3940  // CMP case because it causes a register dependency and cannot be folded
3941  // later.
3942 
3943  switch (Op.getOpcode()) {
3944  default:
3945  llvm_unreachable("Unknown overflow instruction!");
3946  case ISD::SADDO:
3947  ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3948  Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
3949  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3950  break;
3951  case ISD::UADDO:
3952  ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3953  // We use ADDC here to correspond to its use in LowerUnsignedALUO.
3954  // We do not use it in the USUBO case as Value may not be used.
3955  Value = DAG.getNode(ARMISD::ADDC, dl,
3956  DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
3957  .getValue(0);
3958  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3959  break;
3960  case ISD::SSUBO:
3961  ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3962  Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3963  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3964  break;
3965  case ISD::USUBO:
3966  ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3967  Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3968  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3969  break;
3970  case ISD::UMULO:
3971  // We generate a UMUL_LOHI and then check if the high word is 0.
3972  ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
3973  Value = DAG.getNode(ISD::UMUL_LOHI, dl,
3974  DAG.getVTList(Op.getValueType(), Op.getValueType()),
3975  LHS, RHS);
3976  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
3977  DAG.getConstant(0, dl, MVT::i32));
3978  Value = Value.getValue(0); // We only want the low 32 bits for the result.
3979  break;
3980  case ISD::SMULO:
3981  // We generate a SMUL_LOHI and then check if all the bits of the high word
3982  // are the same as the sign bit of the low word.
3983  ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
3984  Value = DAG.getNode(ISD::SMUL_LOHI, dl,
3985  DAG.getVTList(Op.getValueType(), Op.getValueType()),
3986  LHS, RHS);
3987  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
3988  DAG.getNode(ISD::SRA, dl, Op.getValueType(),
3989  Value.getValue(0),
3990  DAG.getConstant(31, dl, MVT::i32)));
3991  Value = Value.getValue(0); // We only want the low 32 bits for the result.
3992  break;
3993  } // switch (...)
3994 
3995  return std::make_pair(Value, OverflowCmp);
3996 }
3997 
3998 SDValue
3999 ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
4000  // Let legalize expand this if it isn't a legal type yet.
4002  return SDValue();
4003 
4004  SDValue Value, OverflowCmp;
4005  SDValue ARMcc;
4006  std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
4007  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4008  SDLoc dl(Op);
4009  // We use 0 and 1 as false and true values.
4010  SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
4011  SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
4012  EVT VT = Op.getValueType();
4013 
4014  SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
4015  ARMcc, CCR, OverflowCmp);
4016 
4017  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
4018  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
4019 }
4020 
4022  SelectionDAG &DAG) {
4023  SDLoc DL(BoolCarry);
4024  EVT CarryVT = BoolCarry.getValueType();
4025 
4026  // This converts the boolean value carry into the carry flag by doing
4027  // ARMISD::SUBC Carry, 1
4028  SDValue Carry = DAG.getNode(ARMISD::SUBC, DL,
4029  DAG.getVTList(CarryVT, MVT::i32),
4030  BoolCarry, DAG.getConstant(1, DL, CarryVT));
4031  return Carry.getValue(1);
4032 }
4033 
4035  SelectionDAG &DAG) {
4036  SDLoc DL(Flags);
4037 
4038  // Now convert the carry flag into a boolean carry. We do this
4039  // using ARMISD:ADDE 0, 0, Carry
4040  return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32),
4041  DAG.getConstant(0, DL, MVT::i32),
4042  DAG.getConstant(0, DL, MVT::i32), Flags);
4043 }
4044 
4045 SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
4046  SelectionDAG &DAG) const {
4047  // Let legalize expand this if it isn't a legal type yet.
4049  return SDValue();
4050 
4051  SDValue LHS = Op.getOperand(0);
4052  SDValue RHS = Op.getOperand(1);
4053  SDLoc dl(Op);
4054 
4055  EVT VT = Op.getValueType();
4056  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4057  SDValue Value;
4058  SDValue Overflow;
4059  switch (Op.getOpcode()) {
4060  default:
4061  llvm_unreachable("Unknown overflow instruction!");
4062  case ISD::UADDO:
4063  Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
4064  // Convert the carry flag into a boolean value.
4065  Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
4066  break;
4067  case ISD::USUBO: {
4068  Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
4069  // Convert the carry flag into a boolean value.
4070  Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
4071  // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow
4072  // value. So compute 1 - C.
4073  Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,
4074  DAG.getConstant(1, dl, MVT::i32), Overflow);
4075  break;
4076  }
4077  }
4078 
4079  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
4080 }
4081 
4082 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
4083  SDValue Cond = Op.getOperand(0);
4084  SDValue SelectTrue = Op.getOperand(1);
4085  SDValue SelectFalse = Op.getOperand(2);
4086  SDLoc dl(Op);
4087  unsigned Opc = Cond.getOpcode();
4088 
4089  if (Cond.getResNo() == 1 &&
4090  (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
4091  Opc == ISD::USUBO)) {
4092  if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
4093  return SDValue();
4094 
4095  SDValue Value, OverflowCmp;
4096  SDValue ARMcc;
4097  std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
4098  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4099  EVT VT = Op.getValueType();
4100 
4101  return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
4102  OverflowCmp, DAG);
4103  }
4104 
4105  // Convert:
4106  //
4107  // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
4108  // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
4109  //
4110  if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
4111  const ConstantSDNode *CMOVTrue =
4113  const ConstantSDNode *CMOVFalse =
4115 
4116  if (CMOVTrue && CMOVFalse) {
4117  unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
4118  unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
4119 
4120  SDValue True;
4121  SDValue False;
4122  if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
4123  True = SelectTrue;
4124  False = SelectFalse;
4125  } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
4126  True = SelectFalse;
4127  False = SelectTrue;
4128  }
4129 
4130  if (True.getNode() && False.getNode()) {
4131  EVT VT = Op.getValueType();
4132  SDValue ARMcc = Cond.getOperand(2);
4133  SDValue CCR = Cond.getOperand(3);
4134  SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
4135  assert(True.getValueType() == VT);
4136  return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
4137  }
4138  }
4139  }
4140 
4141  // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
4142  // undefined bits before doing a full-word comparison with zero.
4143  Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
4144  DAG.getConstant(1, dl, Cond.getValueType()));
4145 
4146  return DAG.getSelectCC(dl, Cond,
4147  DAG.getConstant(0, dl, Cond.getValueType()),
4148  SelectTrue, SelectFalse, ISD::SETNE);
4149 }
4150 
4152  bool &swpCmpOps, bool &swpVselOps) {
4153  // Start by selecting the GE condition code for opcodes that return true for
4154  // 'equality'
4155  if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
4156  CC == ISD::SETULE)
4157  CondCode = ARMCC::GE;
4158 
4159  // and GT for opcodes that return false for 'equality'.
4160  else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
4161  CC == ISD::SETULT)
4162  CondCode = ARMCC::GT;
4163 
4164  // Since we are constrained to GE/GT, if the opcode contains 'less', we need
4165  // to swap the compare operands.
4166  if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
4167  CC == ISD::SETULT)
4168  swpCmpOps = true;
4169 
4170  // Both GT and GE are ordered comparisons, and return false for 'unordered'.
4171  // If we have an unordered opcode, we need to swap the operands to the VSEL
4172  // instruction (effectively negating the condition).
4173  //
4174  // This also has the effect of swapping which one of 'less' or 'greater'
4175  // returns true, so we also swap the compare operands. It also switches
4176  // whether we return true for 'equality', so we compensate by picking the
4177  // opposite condition code to our original choice.
4178  if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
4179  CC == ISD::SETUGT) {
4180  swpCmpOps = !swpCmpOps;
4181  swpVselOps = !swpVselOps;
4182  CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
4183  }
4184 
4185  // 'ordered' is 'anything but unordered', so use the VS condition code and
4186  // swap the VSEL operands.
4187  if (CC == ISD::SETO) {
4188  CondCode = ARMCC::VS;
4189  swpVselOps = true;
4190  }
4191 
4192  // 'unordered or not equal' is 'anything but equal', so use the EQ condition
4193  // code and swap the VSEL operands.
4194  if (CC == ISD::SETUNE) {
4195  CondCode = ARMCC::EQ;
4196  swpVselOps = true;
4197  }
4198 }
4199 
4200 SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
4201  SDValue TrueVal, SDValue ARMcc, SDValue CCR,
4202  SDValue Cmp, SelectionDAG &DAG) const {
4203  if (Subtarget->isFPOnlySP() && VT == MVT::f64) {
4204  FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
4205  DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
4206  TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
4207  DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
4208 
4209  SDValue TrueLow = TrueVal.getValue(0);
4210  SDValue TrueHigh = TrueVal.getValue(1);
4211  SDValue FalseLow = FalseVal.getValue(0);
4212  SDValue FalseHigh = FalseVal.getValue(1);
4213 
4214  SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
4215  ARMcc, CCR, Cmp);
4216  SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
4217  ARMcc, CCR, duplicateCmp(Cmp, DAG));
4218 
4219  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
4220  } else {
4221  return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
4222  Cmp);
4223  }
4224 }
4225 
4226 static bool isGTorGE(ISD::CondCode CC) {
4227  return CC == ISD::SETGT || CC == ISD::SETGE;
4228 }
4229 
4230 static bool isLTorLE(ISD::CondCode CC) {
4231  return CC == ISD::SETLT || CC == ISD::SETLE;
4232 }
4233 
4234 // See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
4235 // All of these conditions (and their <= and >= counterparts) will do:
4236 // x < k ? k : x
4237 // x > k ? x : k
4238 // k < x ? x : k
4239 // k > x ? k : x
4240 static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
4241  const SDValue TrueVal, const SDValue FalseVal,
4242  const ISD::CondCode CC, const SDValue K) {
4243  return (isGTorGE(CC) &&
4244  ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
4245  (isLTorLE(CC) &&
4246  ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
4247 }
4248 
4249 // Similar to isLowerSaturate(), but checks for upper-saturating conditions.
4250 static bool isUpperSaturate(const SDValue LHS, const SDValue RHS,
4251  const SDValue TrueVal, const SDValue FalseVal,
4252  const ISD::CondCode CC, const SDValue K) {
4253  return (isGTorGE(CC) &&
4254  ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))) ||
4255  (isLTorLE(CC) &&
4256  ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal)));
4257 }
4258 
4259 // Check if two chained conditionals could be converted into SSAT or USAT.
4260 //
4261 // SSAT can replace a set of two conditional selectors that bound a number to an
4262 // interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
4263 //
4264 // x < -k ? -k : (x > k ? k : x)
4265 // x < -k ? -k : (x < k ? x : k)
4266 // x > -k ? (x > k ? k : x) : -k
4267 // x < k ? (x < -k ? -k : x) : k
4268 // etc.
4269 //
4270 // USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1 is
4271 // a power of 2.
4272 //
4273 // It returns true if the conversion can be done, false otherwise.
4274 // Additionally, the variable is returned in parameter V, the constant in K and
4275 // usat is set to true if the conditional represents an unsigned saturation
4276 static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
4277  uint64_t &K, bool &usat) {
4278  SDValue LHS1 = Op.getOperand(0);
4279  SDValue RHS1 = Op.getOperand(1);
4280  SDValue TrueVal1 = Op.getOperand(2);
4281  SDValue FalseVal1 = Op.getOperand(3);
4282  ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4283 
4284  const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
4285  if (Op2.getOpcode() != ISD::SELECT_CC)
4286  return false;
4287 
4288  SDValue LHS2 = Op2.getOperand(0);
4289  SDValue RHS2 = Op2.getOperand(1);
4290  SDValue TrueVal2 = Op2.getOperand(2);
4291  SDValue FalseVal2 = Op2.getOperand(3);
4292  ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
4293 
4294  // Find out which are the constants and which are the variables
4295  // in each conditional
4296  SDValue *K1 = isa<ConstantSDNode>(LHS1) ? &LHS1 : isa<ConstantSDNode>(RHS1)
4297  ? &RHS1
4298  : nullptr;
4299  SDValue *K2 = isa<ConstantSDNode>(LHS2) ? &LHS2 : isa<ConstantSDNode>(RHS2)
4300  ? &RHS2
4301  : nullptr;
4302  SDValue K2Tmp = isa<ConstantSDNode>(TrueVal2) ? TrueVal2 : FalseVal2;
4303  SDValue V1Tmp = (K1 && *K1 == LHS1) ? RHS1 : LHS1;
4304  SDValue V2Tmp = (K2 && *K2 == LHS2) ? RHS2 : LHS2;
4305  SDValue V2 = (K2Tmp == TrueVal2) ? FalseVal2 : TrueVal2;
4306 
4307  // We must detect cases where the original operations worked with 16- or
4308  // 8-bit values. In such case, V2Tmp != V2 because the comparison operations
4309  // must work with sign-extended values but the select operations return
4310  // the original non-extended value.
4311  SDValue V2TmpReg = V2Tmp;
4312  if (V2Tmp->getOpcode() == ISD::SIGN_EXTEND_INREG)
4313  V2TmpReg = V2Tmp->getOperand(0);
4314 
4315  // Check that the registers and the constants have the correct values
4316  // in both conditionals
4317  if (!K1 || !K2 || *K1 == Op2 || *K2 != K2Tmp || V1Tmp != V2Tmp ||
4318  V2TmpReg != V2)
4319  return false;
4320 
4321  // Figure out which conditional is saturating the lower/upper bound.
4322  const SDValue *LowerCheckOp =
4323  isLowerSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
4324  ? &Op
4325  : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
4326  ? &Op2
4327  : nullptr;
4328  const SDValue *UpperCheckOp =
4329  isUpperSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
4330  ? &Op
4331  : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
4332  ? &Op2
4333  : nullptr;
4334 
4335  if (!UpperCheckOp || !LowerCheckOp || LowerCheckOp == UpperCheckOp)
4336  return false;
4337 
4338  // Check that the constant in the lower-bound check is
4339  // the opposite of the constant in the upper-bound check
4340  // in 1's complement.
4341  int64_t Val1 = cast<ConstantSDNode>(*K1)->getSExtValue();
4342  int64_t Val2 = cast<ConstantSDNode>(*K2)->getSExtValue();
4343  int64_t PosVal = std::max(Val1, Val2);
4344  int64_t NegVal = std::min(Val1, Val2);
4345 
4346  if (((Val1 > Val2 && UpperCheckOp == &Op) ||
4347  (Val1 < Val2 && UpperCheckOp == &Op2)) &&
4348  isPowerOf2_64(PosVal + 1)) {
4349 
4350  // Handle the difference between USAT (unsigned) and SSAT (signed) saturation
4351  if (Val1 == ~Val2)
4352  usat = false;
4353  else if (NegVal == 0)
4354  usat = true;
4355  else
4356  return false;
4357 
4358  V = V2;
4359  K = (uint64_t)PosVal; // At this point, PosVal is guaranteed to be positive
4360 
4361  return true;
4362  }
4363 
4364  return false;
4365 }
4366 
4367 // Check if a condition of the type x < k ? k : x can be converted into a
4368 // bit operation instead of conditional moves.
4369 // Currently this is allowed given:
4370 // - The conditions and values match up
4371 // - k is 0 or -1 (all ones)
4372 // This function will not check the last condition, thats up to the caller
4373 // It returns true if the transformation can be made, and in such case
4374 // returns x in V, and k in SatK.
4376  SDValue &SatK)
4377 {
4378  SDValue LHS = Op.getOperand(0);
4379  SDValue RHS = Op.getOperand(1);
4380  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4381  SDValue TrueVal = Op.getOperand(2);
4382  SDValue FalseVal = Op.getOperand(3);
4383 
4384  SDValue *K = isa<ConstantSDNode>(LHS) ? &LHS : isa<ConstantSDNode>(RHS)
4385  ? &RHS
4386  : nullptr;
4387 
4388  // No constant operation in comparison, early out
4389  if (!K)
4390  return false;
4391 
4392  SDValue KTmp = isa<ConstantSDNode>(TrueVal) ? TrueVal : FalseVal;
4393  V = (KTmp == TrueVal) ? FalseVal : TrueVal;
4394  SDValue VTmp = (K && *K == LHS) ? RHS : LHS;
4395 
4396  // If the constant on left and right side, or variable on left and right,
4397  // does not match, early out
4398  if (*K != KTmp || V != VTmp)
4399  return false;
4400 
4401  if (isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K)) {
4402  SatK = *K;
4403  return true;
4404  }
4405 
4406  return false;
4407 }
4408 
4409 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
4410  EVT VT = Op.getValueType();
4411  SDLoc dl(Op);
4412 
4413  // Try to convert two saturating conditional selects into a single SSAT
4414  SDValue SatValue;
4415  uint64_t SatConstant;
4416  bool SatUSat;
4417  if (((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) &&
4418  isSaturatingConditional(Op, SatValue, SatConstant, SatUSat)) {
4419  if (SatUSat)
4420  return DAG.getNode(ARMISD::USAT, dl, VT, SatValue,
4421  DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
4422  else
4423  return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue,
4424  DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
4425  }
4426 
4427  // Try to convert expressions of the form x < k ? k : x (and similar forms)
4428  // into more efficient bit operations, which is possible when k is 0 or -1
4429  // On ARM and Thumb-2 which have flexible operand 2 this will result in
4430  // single instructions. On Thumb the shift and the bit operation will be two
4431  // instructions.
4432  // Only allow this transformation on full-width (32-bit) operations
4433  SDValue LowerSatConstant;
4434  if (VT == MVT::i32 &&
4435  isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) {
4436  SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue,
4437  DAG.getConstant(31, dl, VT));
4438  if (isNullConstant(LowerSatConstant)) {
4439  SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV,
4440  DAG.getAllOnesConstant(dl, VT));
4441  return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV);
4442  } else if (isAllOnesConstant(LowerSatConstant))
4443  return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV);
4444  }
4445 
4446  SDValue LHS = Op.getOperand(0);
4447  SDValue RHS = Op.getOperand(1);
4448  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4449  SDValue TrueVal = Op.getOperand(2);
4450  SDValue FalseVal = Op.getOperand(3);
4451 
4452  if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
4453  DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
4454  dl);
4455 
4456  // If softenSetCCOperands only returned one value, we should compare it to
4457  // zero.
4458  if (!RHS.getNode()) {
4459  RHS = DAG.getConstant(0, dl, LHS.getValueType());
4460  CC = ISD::SETNE;
4461  }
4462  }
4463 
4464  if (LHS.getValueType() == MVT::i32) {
4465  // Try to generate VSEL on ARMv8.
4466  // The VSEL instruction can't use all the usual ARM condition
4467  // codes: it only has two bits to select the condition code, so it's
4468  // constrained to use only GE, GT, VS and EQ.
4469  //
4470  // To implement all the various ISD::SETXXX opcodes, we sometimes need to
4471  // swap the operands of the previous compare instruction (effectively
4472  // inverting the compare condition, swapping 'less' and 'greater') and
4473  // sometimes need to swap the operands to the VSEL (which inverts the
4474  // condition in the sense of firing whenever the previous condition didn't)
4475  if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
4476  TrueVal.getValueType() == MVT::f64)) {
4478  if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
4479  CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
4480  CC = ISD::getSetCCInverse(CC, true);
4481  std::swap(TrueVal, FalseVal);
4482  }
4483  }
4484 
4485  SDValue ARMcc;
4486  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4487  SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
4488  return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
4489  }
4490 
4491  ARMCC::CondCodes CondCode, CondCode2;
4492  bool InvalidOnQNaN;
4493  FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
4494 
4495  // Normalize the fp compare. If RHS is zero we keep it there so we match
4496  // CMPFPw0 instead of CMPFP.
4497  if (Subtarget->hasFPARMv8() && !isFloatingPointZero(RHS) &&
4498  (TrueVal.getValueType() == MVT::f16 ||
4499  TrueVal.getValueType() == MVT::f32 ||
4500  TrueVal.getValueType() == MVT::f64)) {
4501  bool swpCmpOps = false;
4502  bool swpVselOps = false;
4503  checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
4504 
4505  if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
4506  CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
4507  if (swpCmpOps)
4508  std::swap(LHS, RHS);
4509  if (swpVselOps)
4510  std::swap(TrueVal, FalseVal);
4511  }
4512  }
4513 
4514  SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4515  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
4516  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4517  SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
4518  if (CondCode2 != ARMCC::AL) {
4519  SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
4520  // FIXME: Needs another CMP because flag can have but one use.
4521  SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
4522  Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
4523  }
4524  return Result;
4525 }
4526 
4527 /// canChangeToInt - Given the fp compare operand, return true if it is suitable
4528 /// to morph to an integer compare sequence.
4529 static bool canChangeToInt(SDValue Op, bool &SeenZero,
4530  const ARMSubtarget *Subtarget) {
4531  SDNode *N = Op.getNode();
4532  if (!N->hasOneUse())
4533  // Otherwise it requires moving the value from fp to integer registers.
4534  return false;
4535  if (!N->getNumValues())
4536  return false;
4537  EVT VT = Op.getValueType();
4538  if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
4539  // f32 case is generally profitable. f64 case only makes sense when vcmpe +
4540  // vmrs are very slow, e.g. cortex-a8.
4541  return false;
4542 
4543  if (isFloatingPointZero(Op)) {
4544  SeenZero = true;
4545  return true;
4546  }
4547  return ISD::isNormalLoad(N);
4548 }
4549 
4551  if (isFloatingPointZero(Op))
4552  return DAG.getConstant(0, SDLoc(Op), MVT::i32);
4553 
4554  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
4555  return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
4556  Ld->getPointerInfo(), Ld->getAlignment(),
4557  Ld->getMemOperand()->getFlags());
4558 
4559  llvm_unreachable("Unknown VFP cmp argument!");
4560 }
4561 
4562 static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
4563  SDValue &RetVal1, SDValue &RetVal2) {
4564  SDLoc dl(Op);
4565 
4566  if (isFloatingPointZero(Op)) {
4567  RetVal1 = DAG.getConstant(0, dl, MVT::i32);
4568  RetVal2 = DAG.getConstant(0, dl, MVT::i32);
4569  return;
4570  }
4571 
4572  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
4573  SDValue Ptr = Ld->getBasePtr();
4574  RetVal1 =
4575  DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
4576  Ld->getAlignment(), Ld->getMemOperand()->getFlags());
4577 
4578  EVT PtrType = Ptr.getValueType();
4579  unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
4580  SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
4581  PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
4582  RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
4583  Ld->getPointerInfo().getWithOffset(4), NewAlign,
4584  Ld->getMemOperand()->getFlags());
4585  return;
4586  }
4587 
4588  llvm_unreachable("Unknown VFP cmp argument!");
4589 }
4590 
4591 /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
4592 /// f32 and even f64 comparisons to integer ones.
4593 SDValue
4594 ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
4595  SDValue Chain = Op.getOperand(0);
4596  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
4597  SDValue LHS = Op.getOperand(2);
4598  SDValue RHS = Op.getOperand(3);
4599  SDValue Dest = Op.getOperand(4);
4600  SDLoc dl(Op);
4601 
4602  bool LHSSeenZero = false;
4603  bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
4604  bool RHSSeenZero = false;
4605  bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
4606  if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
4607  // If unsafe fp math optimization is enabled and there are no other uses of
4608  // the CMP operands, and the condition code is EQ or NE, we can optimize it
4609  // to an integer comparison.
4610  if (CC == ISD::SETOEQ)
4611  CC = ISD::SETEQ;
4612  else if (CC == ISD::SETUNE)
4613  CC = ISD::SETNE;
4614 
4615  SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
4616  SDValue ARMcc;
4617  if (LHS.getValueType() == MVT::f32) {
4618  LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
4619  bitcastf32Toi32(LHS, DAG), Mask);
4620  RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
4621  bitcastf32Toi32(RHS, DAG), Mask);
4622  SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
4623  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4624  return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
4625  Chain, Dest, ARMcc, CCR, Cmp);
4626  }
4627 
4628  SDValue LHS1, LHS2;
4629  SDValue RHS1, RHS2;
4630  expandf64Toi32(LHS, DAG, LHS1, LHS2);
4631  expandf64Toi32(RHS, DAG, RHS1, RHS2);
4632  LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
4633  RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
4635  ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4636  SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
4637  SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
4638  return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
4639  }
4640 
4641  return SDValue();
4642 }
4643 
4644 SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
4645  SDValue Chain = Op.getOperand(0);
4646  SDValue Cond = Op.getOperand(1);
4647  SDValue Dest = Op.getOperand(2);
4648  SDLoc dl(Op);
4649 
4650  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
4651  // instruction.
4652  unsigned Opc = Cond.getOpcode();
4653  bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
4654  !Subtarget->isThumb1Only();
4655  if (Cond.getResNo() == 1 &&
4656  (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
4657  Opc == ISD::USUBO || OptimizeMul)) {
4658  // Only lower legal XALUO ops.
4659  if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
4660  return SDValue();
4661 
4662  // The actual operation with overflow check.
4663  SDValue Value, OverflowCmp;
4664  SDValue ARMcc;
4665  std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
4666 
4667  // Reverse the condition code.
4669  (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
4670  CondCode = ARMCC::getOppositeCondition(CondCode);
4671  ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
4672  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4673 
4674  return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
4675  OverflowCmp);
4676  }
4677 
4678  return SDValue();
4679 }
4680 
4681 SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
4682  SDValue Chain = Op.getOperand(0);
4683  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
4684  SDValue LHS = Op.getOperand(2);
4685  SDValue RHS = Op.getOperand(3);
4686  SDValue Dest = Op.getOperand(4);
4687  SDLoc dl(Op);
4688 
4689  if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
4690  DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
4691  dl);
4692 
4693  // If softenSetCCOperands only returned one value, we should compare it to
4694  // zero.
4695  if (!RHS.getNode()) {
4696  RHS = DAG.getConstant(0, dl, LHS.getValueType());
4697  CC = ISD::SETNE;
4698  }
4699  }
4700 
4701  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
4702  // instruction.
4703  unsigned Opc = LHS.getOpcode();
4704  bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
4705  !Subtarget->isThumb1Only();
4706  if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) &&
4707  (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
4708  Opc == ISD::USUBO || OptimizeMul) &&
4709  (CC == ISD::SETEQ || CC == ISD::SETNE)) {
4710  // Only lower legal XALUO ops.
4711  if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
4712  return SDValue();
4713 
4714  // The actual operation with overflow check.
4715  SDValue Value, OverflowCmp;
4716  SDValue ARMcc;
4717  std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc);
4718 
4719  if ((CC == ISD::SETNE) != isOneConstant(RHS)) {
4720  // Reverse the condition code.
4722  (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
4723  CondCode = ARMCC::getOppositeCondition(CondCode);
4724  ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
4725  }
4726  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4727 
4728  return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
4729  OverflowCmp);
4730  }
4731 
4732  if (LHS.getValueType() == MVT::i32) {
4733  SDValue ARMcc;
4734  SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
4735  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4736  return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
4737  Chain, Dest, ARMcc, CCR, Cmp);
4738  }
4739 
4740  if (getTargetMachine().Options.UnsafeFPMath &&
4741  (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
4742  CC == ISD::SETNE || CC == ISD::SETUNE)) {
4743  if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
4744  return Result;
4745  }
4746 
4747  ARMCC::CondCodes CondCode, CondCode2;
4748  bool InvalidOnQNaN;
4749  FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
4750 
4751  SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4752  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
4753  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4754  SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
4755  SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
4756  SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
4757  if (CondCode2 != ARMCC::AL) {
4758  ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
4759  SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
4760  Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
4761  }
4762  return Res;
4763 }
4764 
4765 SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
4766  SDValue Chain = Op.getOperand(0);
4767  SDValue Table = Op.getOperand(1);
4768  SDValue Index = Op.getOperand(2);
4769  SDLoc dl(Op);
4770 
4771  EVT PTy = getPointerTy(DAG.getDataLayout());
4772  JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
4773  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
4774  Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
4775  Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
4776  SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index);
4777  if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
4778  // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
4779  // which does another jump to the destination. This also makes it easier
4780  // to translate it to TBB / TBH later (Thumb2 only).
4781  // FIXME: This might not work if the function is extremely large.
4782  return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
4783  Addr, Op.getOperand(2), JTI);
4784  }
4785  if (isPositionIndependent() || Subtarget->isROPI()) {
4786  Addr =
4787  DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
4789  Chain = Addr.getValue(1);
4790  Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr);
4791  return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
4792  } else {
4793  Addr =
4794  DAG.getLoad(PTy, dl, Chain, Addr,
4796  Chain = Addr.getValue(1);
4797  return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
4798  }
4799 }
4800 
4802  EVT VT = Op.getValueType();
4803  SDLoc dl(Op);
4804 
4805  if (Op.getValueType().getVectorElementType() == MVT::i32) {
4807  return Op;
4808  return DAG.UnrollVectorOp(Op.getNode());
4809  }
4810 
4811  const bool HasFullFP16 =
4812  static_cast<const ARMSubtarget&>(DAG.getSubtarget()).hasFullFP16();
4813 
4814  EVT NewTy;
4815  const EVT OpTy = Op.getOperand(0).getValueType();
4816  if (OpTy == MVT::v4f32)
4817  NewTy = MVT::v4i32;
4818  else if (OpTy == MVT::v4f16 && HasFullFP16)
4819  NewTy = MVT::v4i16;
4820  else if (OpTy == MVT::v8f16 && HasFullFP16)
4821  NewTy = MVT::v8i16;
4822  else
4823  llvm_unreachable("Invalid type for custom lowering!");
4824 
4825  if (VT != MVT::v4i16 && VT != MVT::v8i16)
4826  return DAG.UnrollVectorOp(Op.getNode());
4827 
4828  Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0));
4829  return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
4830 }
4831 
4832 SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
4833  EVT VT = Op.getValueType();
4834  if (VT.isVector())
4835  return LowerVectorFP_TO_INT(Op, DAG);
4836  if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {
4837  RTLIB::Libcall LC;
4838  if (Op.getOpcode() == ISD::FP_TO_SINT)
4840  Op.getValueType());
4841  else
4843  Op.getValueType());
4844  return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
4845  /*isSigned*/ false, SDLoc(Op)).first;
4846  }
4847 
4848  return Op;
4849 }
4850 
4852  EVT VT = Op.getValueType();
4853  SDLoc dl(Op);
4854 
4856  if (VT.getVectorElementType() == MVT::f32)
4857  return Op;
4858  return DAG.UnrollVectorOp(Op.getNode());
4859  }
4860 
4861  assert((Op.getOperand(0).getValueType() == MVT::v4i16 ||
4862  Op.getOperand(0).getValueType() == MVT::v8i16) &&
4863  "Invalid type for custom lowering!");
4864 
4865  const bool HasFullFP16 =
4866  static_cast<const ARMSubtarget&>(DAG.getSubtarget()).hasFullFP16();
4867 
4868  EVT DestVecType;
4869  if (VT == MVT::v4f32)
4870  DestVecType = MVT::v4i32;
4871  else if (VT == MVT::v4f16 && HasFullFP16)
4872  DestVecType = MVT::v4i16;
4873  else if (VT == MVT::v8f16 && HasFullFP16)
4874  DestVecType = MVT::v8i16;
4875  else
4876  return DAG.UnrollVectorOp(Op.getNode());
4877 
4878  unsigned CastOpc;
4879  unsigned Opc;
4880  switch (Op.getOpcode()) {
4881  default: llvm_unreachable("Invalid opcode!");
4882  case ISD::SINT_TO_FP:
4883  CastOpc = ISD::SIGN_EXTEND;
4884  Opc = ISD::SINT_TO_FP;
4885  break;
4886  case ISD::UINT_TO_FP:
4887  CastOpc = ISD::ZERO_EXTEND;
4888  Opc = ISD::UINT_TO_FP;
4889  break;
4890  }
4891 
4892  Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0));
4893  return DAG.getNode(Opc, dl, VT, Op);
4894 }
4895 
4896 SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
4897  EVT VT = Op.getValueType();
4898  if (VT.isVector())
4899  return LowerVectorINT_TO_FP(Op, DAG);
4900  if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {
4901  RTLIB::Libcall LC;
4902  if (Op.getOpcode() == ISD::SINT_TO_FP)
4904  Op.getValueType());
4905  else
4907  Op.getValueType());
4908  return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
4909  /*isSigned*/ false, SDLoc(Op)).first;
4910  }
4911 
4912  return Op;
4913 }
4914 
4915 SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
4916  // Implement fcopysign with a fabs and a conditional fneg.
4917  SDValue Tmp0 = Op.getOperand(0);
4918  SDValue Tmp1 = Op.getOperand(1);
4919  SDLoc dl(Op);
4920  EVT VT = Op.getValueType();
4921  EVT SrcVT = Tmp1.getValueType();
4922  bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
4923  Tmp0.getOpcode() == ARMISD::VMOVDRR;
4924  bool UseNEON = !InGPR && Subtarget->hasNEON();
4925 
4926  if (UseNEON) {
4927  // Use VBSL to copy the sign bit.
4928  unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
4930  DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
4931  EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
4932  if (VT == MVT::f64)
4933  Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
4934  DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
4935  DAG.getConstant(32, dl, MVT::i32));
4936  else /*if (VT == MVT::f32)*/
4937  Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
4938  if (SrcVT == MVT::f32) {
4939  Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
4940  if (VT == MVT::f64)
4941  Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
4942  DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
4943  DAG.getConstant(32, dl, MVT::i32));
4944  } else if (VT == MVT::f32)
4945  Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
4946  DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
4947  DAG.getConstant(32, dl, MVT::i32));
4948  Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
4949  Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
4950 
4951  SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
4952  dl, MVT::i32);
4953  AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
4954  SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
4955  DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
4956 
4957  SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
4958  DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
4959  DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
4960  if (VT == MVT::f32) {
4961  Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
4962  Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
4963  DAG.getConstant(0, dl, MVT::i32));
4964  } else {
4965  Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
4966  }
4967 
4968  return Res;
4969  }
4970 
4971  // Bitcast operand 1 to i32.
4972  if (SrcVT == MVT::f64)
4973  Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
4974  Tmp1).getValue(1);
4975  Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
4976 
4977  // Or in the signbit with integer operations.
4978  SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
4979  SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
4980  Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
4981  if (VT == MVT::f32) {
4982  Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
4983  DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
4984  return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
4985  DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
4986  }
4987 
4988  // f64: Or the high part with signbit and then combine two parts.
4989  Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
4990  Tmp0);
4991  SDValue Lo = Tmp0.getValue(0);
4992  SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
4993  Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
4994  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
4995 }
4996 
4997 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
4998  MachineFunction &MF = DAG.getMachineFunction();
4999  MachineFrameInfo &MFI = MF.getFrameInfo();
5000  MFI.setReturnAddressIsTaken(true);
5001 
5003  return SDValue();
5004 
5005  EVT VT = Op.getValueType();
5006  SDLoc dl(Op);
5007  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5008  if (Depth) {
5009  SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
5010  SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
5011  return DAG.getLoad(VT, dl, DAG.getEntryNode(),
5012  DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
5013  MachinePointerInfo());
5014  }
5015 
5016  // Return LR, which contains the return address. Mark it an implicit live-in.
5017  unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
5018  return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
5019 }
5020 
5021 SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
5022  const ARMBaseRegisterInfo &ARI =
5023  *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
5024  MachineFunction &MF = DAG.getMachineFunction();
5025  MachineFrameInfo &MFI = MF.getFrameInfo();
5026  MFI.setFrameAddressIsTaken(true);
5027 
5028  EVT VT = Op.getValueType();
5029  SDLoc dl(Op); // FIXME probably not meaningful
5030  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5031  unsigned FrameReg = ARI.getFrameRegister(MF);
5032  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
5033  while (Depth--)
5034  FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
5035  MachinePointerInfo());
5036  return FrameAddr;
5037 }
5038 
5039 // FIXME? Maybe this could be a TableGen attribute on some registers and
5040 // this table could be generated automatically from RegInfo.
5041 unsigned ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT,
5042  SelectionDAG &DAG) const {
5043  unsigned Reg = StringSwitch<unsigned>(RegName)
5044  .Case("sp", ARM::SP)
5045  .Default(0);
5046  if (Reg)
5047  return Reg;
5048  report_fatal_error(Twine("Invalid register name \""
5049  + StringRef(RegName) + "\"."));
5050 }
5051 
5052 // Result is 64 bit value so split into two 32 bit values and return as a
5053 // pair of values.
5055  SelectionDAG &DAG) {
5056  SDLoc DL(N);
5057 
5058  // This function is only supposed to be called for i64 type destination.
5059  assert(N->getValueType(0) == MVT::i64
5060  && "ExpandREAD_REGISTER called for non-i64 type result.");
5061 
5062  SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL,
5064  N->getOperand(0),
5065  N->getOperand(1));
5066 
5067  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
5068  Read.getValue(1)));
5069  Results.push_back(Read.getOperand(0));
5070 }
5071 
5072 /// \p BC is a bitcast that is about to be turned into a VMOVDRR.
5073 /// When \p DstVT, the destination type of \p BC, is on the vector
5074 /// register bank and the source of bitcast, \p Op, operates on the same bank,
5075 /// it might be possible to combine them, such that everything stays on the
5076 /// vector register bank.
5077 /// \p return The node that would replace \p BT, if the combine
5078 /// is possible.
5080  SelectionDAG &DAG) {
5081  SDValue Op = BC->getOperand(0);
5082  EVT DstVT = BC->getValueType(0);
5083 
5084  // The only vector instruction that can produce a scalar (remember,
5085  // since the bitcast was about to be turned into VMOVDRR, the source
5086  // type is i64) from a vector is EXTRACT_VECTOR_ELT.
5087  // Moreover, we can do this combine only if there is one use.
5088  // Finally, if the destination type is not a vector, there is not
5089  // much point on forcing everything on the vector bank.
5090  if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
5091  !Op.hasOneUse())
5092  return SDValue();
5093 
5094  // If the index is not constant, we will introduce an additional
5095  // multiply that will stick.
5096  // Give up in that case.
5098  if (!Index)
5099  return SDValue();
5100  unsigned DstNumElt = DstVT.getVectorNumElements();
5101 
5102  // Compute the new index.
5103  const APInt &APIntIndex = Index->getAPIntValue();
5104  APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
5105  NewIndex *= APIntIndex;
5106  // Check if the new constant index fits into i32.
5107  if (NewIndex.getBitWidth() > 32)
5108  return SDValue();
5109 
5110  // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
5111  // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
5112  SDLoc dl(Op);
5113  SDValue ExtractSrc = Op.getOperand(0);
5114  EVT VecVT = EVT::getVectorVT(
5115  *DAG.getContext(), DstVT.getScalarType(),
5116  ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
5117  SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
5118  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
5119  DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
5120 }
5121 
5122 /// ExpandBITCAST - If the target supports VFP, this function is called to
5123 /// expand a bit convert where either the source or destination type is i64 to
5124 /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
5125 /// operand type is illegal (e.g., v2f32 for a target that doesn't support
5126 /// vectors), since the legalizer won't know what to do with that.
5128  const ARMSubtarget *Subtarget) {
5129  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5130  SDLoc dl(N);
5131  SDValue Op = N->getOperand(0);
5132 
5133  // This function is only supposed to be called for i64 types, either as the
5134  // source or destination of the bit convert.
5135  EVT SrcVT = Op.getValueType();
5136  EVT DstVT = N->getValueType(0);
5137  const bool HasFullFP16 = Subtarget->hasFullFP16();
5138 
5139  if (SrcVT == MVT::f32 && DstVT == MVT::i32) {
5140  // FullFP16: half values are passed in S-registers, and we don't
5141  // need any of the bitcast and moves:
5142  //
5143  // t2: f32,ch = CopyFromReg t0, Register:f32 %0
5144  // t5: i32 = bitcast t2
5145  // t18: f16 = ARMISD::VMOVhr t5
5146  if (Op.getOpcode() != ISD::CopyFromReg ||
5147  Op.getValueType() != MVT::f32)
5148  return SDValue();
5149 
5150  auto Move = N->use_begin();
5151  if (Move->getOpcode() != ARMISD::VMOVhr)
5152  return SDValue();
5153 
5154  SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) };
5155  SDValue Copy = DAG.getNode(ISD::CopyFromReg, SDLoc(Op), MVT::f16, Ops);
5156  DAG.ReplaceAllUsesWith(*Move, &Copy);
5157  return Copy;
5158  }
5159 
5160  if (SrcVT == MVT::i16 && DstVT == MVT::f16) {
5161  if (!HasFullFP16)
5162  return SDValue();
5163  // SoftFP: read half-precision arguments:
5164  //
5165  // t2: i32,ch = ...
5166  // t7: i16 = truncate t2 <~~~~ Op
5167  // t8: f16 = bitcast t7 <~~~~ N
5168  //
5169  if (Op.getOperand(0).getValueType() == MVT::i32)
5170  return DAG.getNode(ARMISD::VMOVhr, SDLoc(Op),
5171  MVT::f16, Op.getOperand(0));
5172 
5173  return SDValue();
5174  }
5175 
5176  // Half-precision return values
5177  if (SrcVT == MVT::f16 && DstVT == MVT::i16) {
5178  if (!HasFullFP16)
5179  return SDValue();
5180  //
5181  // t11: f16 = fadd t8, t10
5182  // t12: i16 = bitcast t11 <~~~ SDNode N
5183  // t13: i32 = zero_extend t12
5184  // t16: ch,glue = CopyToReg t0, Register:i32 %r0, t13
5185  // t17: ch = ARMISD::RET_FLAG t16, Register:i32 %r0, t16:1
5186  //
5187  // transform this into:
5188  //
5189  // t20: i32 = ARMISD::VMOVrh t11
5190  // t16: ch,glue = CopyToReg t0, Register:i32 %r0, t20
5191  //
5192  auto ZeroExtend = N->use_begin();
5193  if (N->use_size() != 1 || ZeroExtend->getOpcode() != ISD::ZERO_EXTEND ||
5194  ZeroExtend->getValueType(0) != MVT::i32)
5195  return SDValue();
5196 
5197  auto Copy = ZeroExtend->use_begin();
5198  if (Copy->getOpcode() == ISD::CopyToReg &&
5199  Copy->use_begin()->getOpcode() == ARMISD::RET_FLAG) {
5200  SDValue Cvt = DAG.getNode(ARMISD::VMOVrh, SDLoc(Op), MVT::i32, Op);
5201  DAG.ReplaceAllUsesWith(*ZeroExtend, &Cvt);
5202  return Cvt;
5203  }
5204  return SDValue();
5205  }
5206 
5207  if (!(SrcVT == MVT::i64 || DstVT == MVT::i64))
5208  return SDValue();
5209 
5210  // Turn i64->f64 into VMOVDRR.
5211  if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
5212  // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
5213  // if we can combine the bitcast with its source.
5214  if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG))
5215  return Val;
5216 
5218  DAG.getConstant(0, dl, MVT::i32));
5220  DAG.getConstant(1, dl, MVT::i32));
5221  return DAG.getNode(ISD::BITCAST, dl, DstVT,
5222  DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
5223  }
5224 
5225  // Turn f64->i64 into VMOVRRD.
5226  if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
5227  SDValue Cvt;
5228  if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
5229  SrcVT.getVectorNumElements() > 1)
5230  Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
5231  DAG.getVTList(MVT::i32, MVT::i32),
5232  DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
5233  else
5234  Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
5235  DAG.getVTList(MVT::i32, MVT::i32), Op);
5236  // Merge the pieces into a single i64 value.
5237  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
5238  }
5239 
5240  return SDValue();
5241 }
5242 
5243 /// getZeroVector - Returns a vector of specified type with all zero elements.
5244 /// Zero vectors are used to represent vector negation and in those cases
5245 /// will be implemented with the NEON VNEG instruction. However, VNEG does
5246 /// not support i64 elements, so sometimes the zero vectors will need to be
5247 /// explicitly constructed. Regardless, use a canonical VMOV to create the
5248 /// zero vector.
5249 static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5250  assert(VT.isVector() && "Expected a vector type");
5251  // The canonical modified immediate encoding of a zero vector is....0!
5252  SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
5253  EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
5254  SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
5255  return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
5256 }
5257 
5258 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
5259 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
5260 SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
5261  SelectionDAG &DAG) const {
5262  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
5263  EVT VT = Op.getValueType();
5264  unsigned VTBits = VT.getSizeInBits();
5265  SDLoc dl(Op);
5266  SDValue ShOpLo = Op.getOperand(0);
5267  SDValue ShOpHi = Op.getOperand(1);
5268  SDValue ShAmt = Op.getOperand(2);
5269  SDValue ARMcc;
5270  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5271  unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
5272 
5274 
5275  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
5276  DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
5277  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
5278  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
5279  DAG.getConstant(VTBits, dl, MVT::i32));
5280  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
5281  SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
5282  SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
5283  SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
5284  ISD::SETGE, ARMcc, DAG, dl);
5285  SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift,
5286  ARMcc, CCR, CmpLo);
5287 
5288  SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
5289  SDValue HiBigShift = Opc == ISD::SRA
5290  ? DAG.getNode(Opc, dl, VT, ShOpHi,
5291  DAG.getConstant(VTBits - 1, dl, VT))
5292  : DAG.getConstant(0, dl, VT);
5293  SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
5294  ISD::SETGE, ARMcc, DAG, dl);
5295  SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
5296  ARMcc, CCR, CmpHi);
5297 
5298  SDValue Ops[2] = { Lo, Hi };
5299  return DAG.getMergeValues(Ops, dl);
5300 }
5301 
5302 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
5303 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
5304 SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
5305  SelectionDAG &DAG) const {
5306  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
5307  EVT VT = Op.getValueType();
5308  unsigned VTBits = VT.getSizeInBits();
5309  SDLoc dl(Op);
5310  SDValue ShOpLo = Op.getOperand(0);
5311  SDValue ShOpHi = Op.getOperand(1);
5312  SDValue ShAmt = Op.getOperand(2);
5313  SDValue ARMcc;
5314  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5315 
5316  assert(Op.getOpcode() == ISD::SHL_PARTS);
5317  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
5318  DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
5319  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
5320  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
5321  SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
5322 
5323  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
5324  DAG.getConstant(VTBits, dl, MVT::i32));
5325  SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
5326  SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
5327  ISD::SETGE, ARMcc, DAG, dl);
5328  SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
5329  ARMcc, CCR, CmpHi);
5330 
5331  SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
5332  ISD::SETGE, ARMcc, DAG, dl);
5333  SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
5334  SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
5335  DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo);
5336 
5337  SDValue Ops[2] = { Lo, Hi };
5338  return DAG.getMergeValues(Ops, dl);
5339 }
5340 
5341 SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
5342  SelectionDAG &DAG) const {
5343  // The rounding mode is in bits 23:22 of the FPSCR.
5344  // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
5345  // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
5346  // so that the shift + and get folded into a bitfield extract.
5347  SDLoc dl(Op);
5348  SDValue Ops[] = { DAG.getEntryNode(),
5350 
5351  SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, MVT::i32, Ops);
5352  SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
5353  DAG.getConstant(1U << 22, dl, MVT::i32));
5354  SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
5355  DAG.getConstant(22, dl, MVT::i32));
5356  return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
5357  DAG.getConstant(3, dl, MVT::i32));
5358 }
5359 
5361  const ARMSubtarget *ST) {
5362  SDLoc dl(N);
5363  EVT VT = N->getValueType(0);
5364  if (VT.isVector()) {
5365  assert(ST->hasNEON());
5366 
5367  // Compute the least significant set bit: LSB = X & -X
5368  SDValue X = N->getOperand(0);
5369  SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
5370  SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
5371 
5372  EVT ElemTy = VT.getVectorElementType();
5373 
5374  if (ElemTy == MVT::i8) {
5375  // Compute with: cttz(x) = ctpop(lsb - 1)
5376  SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5377  DAG.getTargetConstant(1, dl, ElemTy));
5378  SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
5379  return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
5380  }
5381 
5382  if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
5383  (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
5384  // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
5385  unsigned NumBits = ElemTy.getSizeInBits();
5386  SDValue WidthMinus1 =
5387  DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5388  DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
5389  SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
5390  return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
5391  }
5392 
5393  // Compute with: cttz(x) = ctpop(lsb - 1)
5394 
5395  // Compute LSB - 1.
5396  SDValue Bits;
5397  if (ElemTy == MVT::i64) {
5398  // Load constant 0xffff'ffff'ffff'ffff to register.
5399  SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5400  DAG.getTargetConstant(0x1eff, dl, MVT::i32));
5401  Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
5402  } else {
5403  SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5404  DAG.getTargetConstant(1, dl, ElemTy));
5405  Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
5406  }
5407  return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
5408  }
5409 
5410  if (!ST->hasV6T2Ops())
5411  return SDValue();
5412 
5413  SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
5414  return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
5415 }
5416 
5418  const ARMSubtarget *ST) {
5419  EVT VT = N->getValueType(0);
5420  SDLoc DL(N);
5421 
5422  assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
5423  assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
5424  VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
5425  "Unexpected type for custom ctpop lowering");
5426 
5427  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5428  EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
5429  SDValue Res = DAG.getBitcast(VT8Bit, N->getOperand(0));
5430  Res = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Res);
5431 
5432  // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
5433  unsigned EltSize = 8;
5434  unsigned NumElts = VT.is64BitVector() ? 8 : 16;
5435  while (EltSize != VT.getScalarSizeInBits()) {
5438  TLI.getPointerTy(DAG.getDataLayout())));
5439  Ops.push_back(Res);
5440 
5441  EltSize *= 2;
5442  NumElts /= 2;
5443  MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
5444  Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WidenVT, Ops);
5445  }
5446 
5447  return Res;
5448 }
5449 
5451  const ARMSubtarget *ST) {
5452  EVT VT = N->getValueType(0);
5453  SDLoc dl(N);
5454 
5455  if (!VT.isVector())
5456  return SDValue();
5457 
5458  // Lower vector shifts on NEON to use VSHL.
5459  assert(ST->hasNEON() && "unexpected vector shift");
5460 
5461  // Left shifts translate directly to the vshiftu intrinsic.
5462  if (N->getOpcode() == ISD::SHL)
5463  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
5465  MVT::i32),
5466  N->getOperand(0), N->getOperand(1));
5467 
5468  assert((N->getOpcode() == ISD::SRA ||
5469  N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
5470 
5471  // NEON uses the same intrinsics for both left and right shifts. For
5472  // right shifts, the shift amounts are negative, so negate the vector of
5473  // shift amounts.
5474  EVT ShiftVT = N->getOperand(1).getValueType();
5475  SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
5476  getZeroVector(ShiftVT, DAG, dl),
5477  N->getOperand(1));
5478  Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
5481  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
5482  DAG.getConstant(vshiftInt, dl, MVT::i32),
5483  N->getOperand(0), NegatedCount);
5484 }
5485 
5487  const ARMSubtarget *ST) {
5488  EVT VT = N->getValueType(0);
5489  SDLoc dl(N);
5490 
5491  // We can get here for a node like i32 = ISD::SHL i32, i64
5492  if (VT != MVT::i64)
5493  return SDValue();
5494 
5495  assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
5496  "Unknown shift to lower!");
5497 
5498  // We only lower SRA, SRL of 1 here, all others use generic lowering.
5499  if (!isOneConstant(N->getOperand(1)))
5500  return SDValue();
5501 
5502  // If we are in thumb mode, we don't have RRX.
5503  if (ST->isThumb1Only()) return SDValue();
5504 
5505  // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
5507  DAG.getConstant(0, dl, MVT::i32));
5509  DAG.getConstant(1, dl, MVT::i32));
5510 
5511  // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
5512  // captures the result into a carry flag.
5513  unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
5514  Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
5515 
5516  // The low part is an ARMISD::RRX operand, which shifts the carry in.
5517  Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
5518 
5519  // Merge the pieces into a single i64 value.
5520  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
5521 }
5522 
5524  SDValue TmpOp0, TmpOp1;
5525  bool Invert = false;
5526  bool Swap = false;
5527  unsigned Opc = 0;
5528 
5529  SDValue Op0 = Op.getOperand(0);
5530  SDValue Op1 = Op.getOperand(1);
5531  SDValue CC = Op.getOperand(2);
5533  EVT VT = Op.getValueType();
5534  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
5535  SDLoc dl(Op);
5536 
5537  if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
5538  (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
5539  // Special-case integer 64-bit equality comparisons. They aren't legal,
5540  // but they can be lowered with a few vector instructions.
5541  unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
5542  EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
5543  SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
5544  SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
5545  SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
5546  DAG.getCondCode(ISD::SETEQ));
5547  SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
5548  SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
5549  Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
5550  if (SetCCOpcode == ISD::SETNE)
5551  Merged = DAG.getNOT(dl, Merged, CmpVT);
5552  Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
5553  return Merged;
5554  }
5555 
5556  if (CmpVT.getVectorElementType() == MVT::i64)
5557  // 64-bit comparisons are not legal in general.
5558  return SDValue();
5559 
5560  if (Op1.getValueType().isFloatingPoint()) {
5561  switch (SetCCOpcode) {
5562  default: llvm_unreachable("Illegal FP comparison");
5563  case ISD::SETUNE:
5564  case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH;
5565  case ISD::SETOEQ:
5566  case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
5567  case ISD::SETOLT:
5568  case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
5569  case ISD::SETOGT:
5570  case ISD::SETGT: Opc = ARMISD::VCGT; break;
5571  case ISD::SETOLE:
5572  case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH;
5573  case ISD::SETOGE:
5574  case ISD::SETGE: Opc = ARMISD::VCGE; break;
5575  case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH;
5576  case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
5577  case ISD::SETUGT: Swap = true; LLVM_FALLTHROUGH;
5578  case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
5579  case ISD::SETUEQ: Invert = true; LLVM_FALLTHROUGH;
5580  case ISD::SETONE:
5581  // Expand this to (OLT | OGT).
5582  TmpOp0 = Op0;
5583  TmpOp1 = Op1;
5584  Opc = ISD::OR;
5585  Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
5586  Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1);
5587  break;
5588  case ISD::SETUO:
5589  Invert = true;
5591  case ISD::SETO:
5592  // Expand this to (OLT | OGE).
5593  TmpOp0 = Op0;
5594  TmpOp1 = Op1;
5595  Opc = ISD::OR;
5596  Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
5597  Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1);
5598  break;
5599  }
5600  } else {
5601  // Integer comparisons.
5602  switch (SetCCOpcode) {
5603  default: llvm_unreachable("Illegal integer comparison");
5604  case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH;
5605  case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
5606  case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
5607  case ISD::SETGT: Opc = ARMISD::VCGT; break;
5608  case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH;
5609  case ISD::SETGE: Opc = ARMISD::VCGE; break;
5610  case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH;
5611  case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
5612  case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH;
5613  case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
5614  }
5615 
5616  // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
5617  if (Opc == ARMISD::VCEQ) {
5618  SDValue AndOp;
5620  AndOp = Op0;
5621  else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
5622  AndOp = Op1;
5623 
5624  // Ignore bitconvert.
5625  if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
5626  AndOp = AndOp.getOperand(0);
5627 
5628  if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
5629  Opc = ARMISD::VTST;
5630  Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
5631  Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
5632  Invert = !Invert;
5633  }
5634  }
5635  }
5636 
5637  if (Swap)
5638  std::swap(Op0, Op1);
5639 
5640  // If one of the operands is a constant vector zero, attempt to fold the
5641  // comparison to a specialized compare-against-zero form.
5642  SDValue SingleOp;
5644  SingleOp = Op0;
5645  else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
5646  if (Opc == ARMISD::VCGE)
5647  Opc = ARMISD::VCLEZ;
5648  else if (Opc == ARMISD::VCGT)
5649  Opc = ARMISD::VCLTZ;
5650  SingleOp = Op1;
5651  }
5652 
5653  SDValue Result;
5654  if (SingleOp.getNode()) {
5655  switch (Opc) {
5656  case ARMISD::VCEQ:
5657  Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break;
5658  case ARMISD::VCGE:
5659  Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break;
5660  case ARMISD::VCLEZ:
5661  Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break;
5662  case ARMISD::VCGT:
5663  Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break;
5664  case ARMISD::VCLTZ:
5665  Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break;
5666  default:
5667  Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
5668  }
5669  } else {
5670  Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
5671  }
5672 
5673  Result = DAG.getSExtOrTrunc(Result, dl, VT);
5674 
5675  if (Invert)
5676  Result = DAG.getNOT(dl, Result, VT);
5677 
5678  return Result;
5679 }
5680 
5682  SDValue LHS = Op.getOperand(0);
5683  SDValue RHS = Op.getOperand(1);
5684  SDValue Carry = Op.getOperand(2);
5685  SDValue Cond = Op.getOperand(3);
5686  SDLoc DL(Op);
5687 
5688  assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only.");
5689 
5690  // ARMISD::SUBE expects a carry not a borrow like ISD::SUBCARRY so we
5691  // have to invert the carry first.
5692  Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
5693  DAG.getConstant(1, DL, MVT::i32), Carry);
5694  // This converts the boolean value carry into the carry flag.
5695  Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
5696 
5697  SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
5698  SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);
5699 
5700  SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
5701  SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
5702  SDValue ARMcc = DAG.getConstant(
5703  IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
5704  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5705  SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR,
5706  Cmp.getValue(1), SDValue());
5707  return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
5708  CCR, Chain.getValue(1));
5709 }
5710 
5711 /// isNEONModifiedImm - Check if the specified splat value corresponds to a
5712 /// valid vector constant for a NEON instruction with a "modified immediate"
5713 /// operand (e.g., VMOV). If so, return the encoded value.
5714 static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
5715  unsigned SplatBitSize, SelectionDAG &DAG,
5716  const SDLoc &dl, EVT &VT, bool is128Bits,
5717  NEONModImmType type) {
5718  unsigned OpCmode, Imm;
5719 
5720  // SplatBitSize is set to the smallest size that splats the vector, so a
5721  // zero vector will always have SplatBitSize == 8. However, NEON modified
5722  // immediate instructions others than VMOV do not support the 8-bit encoding
5723  // of a zero vector, and the default encoding of zero is supposed to be the
5724  // 32-bit version.
5725  if (SplatBits == 0)
5726  SplatBitSize = 32;
5727 
5728  switch (SplatBitSize) {
5729  case 8:
5730  if (type != VMOVModImm)
5731  return SDValue();
5732  // Any 1-byte value is OK. Op=0, Cmode=1110.
5733  assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
5734  OpCmode = 0xe;
5735  Imm = SplatBits;
5736  VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
5737  break;
5738 
5739  case 16:
5740  // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
5741  VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
5742  if ((SplatBits & ~0xff) == 0) {
5743  // Value = 0x00nn: Op=x, Cmode=100x.
5744  OpCmode = 0x8;
5745  Imm = SplatBits;
5746  break;
5747  }
5748  if ((SplatBits & ~0xff00) == 0) {
5749  // Value = 0xnn00: Op=x, Cmode=101x.
5750  OpCmode = 0xa;
5751  Imm = SplatBits >> 8;
5752  break;
5753  }
5754  return SDValue();
5755 
5756  case 32:
5757  // NEON's 32-bit VMOV supports splat values where:
5758  // * only one byte is nonzero, or
5759  // * the least significant byte is 0xff and the second byte is nonzero, or
5760  // * the least significant 2 bytes are 0xff and the third is nonzero.
5761  VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
5762  if ((SplatBits & ~0xff) == 0) {
5763  // Value = 0x000000nn: Op=x, Cmode=000x.
5764  OpCmode = 0;
5765  Imm = SplatBits;
5766  break;
5767  }
5768  if ((SplatBits & ~0xff00) == 0) {
5769  // Value = 0x0000nn00: Op=x, Cmode=001x.
5770  OpCmode = 0x2;
5771  Imm = SplatBits >> 8;
5772  break;
5773  }
5774  if ((SplatBits & ~0xff0000) == 0) {
5775  // Value = 0x00nn0000: Op=x, Cmode=010x.
5776  OpCmode = 0x4;
5777  Imm = SplatBits >> 16;
5778  break;
5779  }
5780  if ((SplatBits & ~0xff000000) == 0) {
5781  // Value = 0xnn000000: Op=x, Cmode=011x.
5782  OpCmode = 0x6;
5783  Imm = SplatBits >> 24;
5784  break;
5785  }
5786 
5787  // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
5788  if (type == OtherModImm) return SDValue();
5789 
5790  if ((SplatBits & ~0xffff) == 0 &&
5791  ((SplatBits | SplatUndef) & 0xff) == 0xff) {
5792  // Value = 0x0000nnff: Op=x, Cmode=1100.
5793  OpCmode = 0xc;
5794  Imm = SplatBits >> 8;
5795  break;
5796  }
5797 
5798  if ((SplatBits & ~0xffffff) == 0 &&
5799  ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
5800  // Value = 0x00nnffff: Op=x, Cmode=1101.
5801  OpCmode = 0xd;
5802  Imm = SplatBits >> 16;
5803  break;
5804  }
5805 
5806  // Note: there are a few 32-bit splat values (specifically: 00ffff00,
5807  // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
5808  // VMOV.I32. A (very) minor optimization would be to replicate the value
5809  // and fall through here to test for a valid 64-bit splat. But, then the
5810  // caller would also need to check and handle the change in size.
5811  return SDValue();
5812 
5813  case 64: {
5814  if (type != VMOVModImm)
5815  return SDValue();
5816  // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
5817  uint64_t BitMask = 0xff;
5818  uint64_t Val = 0;
5819  unsigned ImmMask = 1;
5820  Imm = 0;
5821  for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
5822  if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
5823  Val |= BitMask;
5824  Imm |= ImmMask;
5825  } else if ((SplatBits & BitMask) != 0) {
5826  return SDValue();
5827  }
5828  BitMask <<= 8;
5829  ImmMask <<= 1;
5830  }
5831 
5832  if (DAG.getDataLayout().isBigEndian())
5833  // swap higher and lower 32 bit word
5834  Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);
5835 
5836  // Op=1, Cmode=1110.
5837  OpCmode = 0x1e;
5838  VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
5839  break;
5840  }
5841 
5842  default:
5843  llvm_unreachable("unexpected size for isNEONModifiedImm");
5844  }
5845 
5846  unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
5847  return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
5848 }
5849 
5850 SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
5851  const ARMSubtarget *ST) const {
5852  EVT VT = Op.getValueType();
5853  bool IsDouble = (VT == MVT::f64);
5854  ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
5855  const APFloat &FPVal = CFP->getValueAPF();
5856 
5857  // Prevent floating-point constants from using literal loads
5858  // when execute-only is enabled.
5859  if (ST->genExecuteOnly()) {
5860  // If we can represent the constant as an immediate, don't lower it
5861  if (isFPImmLegal(FPVal, VT))
5862  return Op;
5863  // Otherwise, construct as integer, and move to float register
5864  APInt INTVal = FPVal.bitcastToAPInt();
5865  SDLoc DL(CFP);
5866  switch (VT.getSimpleVT().SimpleTy) {
5867  default:
5868  llvm_unreachable("Unknown floating point type!");
5869  break;
5870  case MVT::f64: {
5871  SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
5872  SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
5873  if (!ST->isLittle())
5874  std::swap(Lo, Hi);
5875  return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
5876  }
5877  case MVT::f32:
5878  return DAG.getNode(ARMISD::VMOVSR, DL, VT,
5879  DAG.getConstant(INTVal, DL, MVT::i32));
5880  }
5881  }
5882 
5883  if (!ST->hasVFP3())
5884  return SDValue();
5885 
5886  // Use the default (constant pool) lowering for double constants when we have
5887  // an SP-only FPU
5888  if (IsDouble && Subtarget->isFPOnlySP())
5889  return SDValue();
5890 
5891  // Try splatting with a VMOV.f32...
5892  int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
5893 
5894  if (ImmVal != -1) {
5895  if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
5896  // We have code in place to select a valid ConstantFP already, no need to
5897  // do any mangling.
5898  return Op;
5899  }
5900 
5901  // It's a float and we are trying to use NEON operations where
5902  // possible. Lower it to a splat followed by an extract.
5903  SDLoc DL(Op);
5904  SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
5905  SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
5906  NewVal);
5907  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
5908  DAG.getConstant(0, DL, MVT::i32));
5909  }
5910 
5911  // The rest of our options are NEON only, make sure that's allowed before
5912  // proceeding..
5913  if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
5914  return SDValue();
5915 
5916  EVT VMovVT;
5917  uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
5918 
5919  // It wouldn't really be worth bothering for doubles except for one very
5920  // important value, which does happen to match: 0.0. So make sure we don't do
5921  // anything stupid.
5922  if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
5923  return SDValue();
5924 
5925  // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
5926  SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
5927  VMovVT, false, VMOVModImm);
5928  if (NewVal != SDValue()) {
5929  SDLoc DL(Op);
5930  SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
5931  NewVal);
5932  if (IsDouble)
5933  return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
5934 
5935  // It's a float: cast and extract a vector element.
5936  SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
5937  VecConstant);
5938  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
5939  DAG.getConstant(0, DL, MVT::i32));
5940  }
5941 
5942  // Finally, try a VMVN.i32
5943  NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
5944  false, VMVNModImm);
5945  if (NewVal != SDValue()) {
5946  SDLoc DL(Op);
5947  SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
5948 
5949  if (IsDouble)
5950  return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
5951 
5952  // It's a float: cast and extract a vector element.
5953  SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
5954  VecConstant);
5955  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
5956  DAG.getConstant(0, DL, MVT::i32));
5957  }
5958 
5959  return SDValue();
5960 }
5961 
5962 // check if an VEXT instruction can handle the shuffle mask when the
5963 // vector sources of the shuffle are the same.
5964 static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
5965  unsigned NumElts = VT.getVectorNumElements();
5966 
5967  // Assume that the first shuffle index is not UNDEF. Fail if it is.
5968  if (M[0] < 0)
5969  return false;
5970 
5971  Imm = M[0];
5972 
5973  // If this is a VEXT shuffle, the immediate value is the index of the first
5974  // element. The other shuffle indices must be the successive elements after
5975  // the first one.
5976  unsigned ExpectedElt = Imm;
5977  for (unsigned i = 1; i < NumElts; ++i) {
5978  // Increment the expected index. If it wraps around, just follow it
5979  // back to index zero and keep going.
5980  ++ExpectedElt;
5981  if (ExpectedElt == NumElts)
5982  ExpectedElt = 0;
5983 
5984  if (M[i] < 0) continue; // ignore UNDEF indices
5985  if (ExpectedElt != static_cast<unsigned>(M[i]))
5986  return false;
5987  }
5988 
5989  return true;
5990 }
5991 
5992 static bool isVEXTMask(ArrayRef<int> M, EVT VT,
5993  bool &ReverseVEXT, unsigned &Imm) {
5994  unsigned NumElts = VT.getVectorNumElements();
5995  ReverseVEXT = false;
5996 
5997  // Assume that the first shuffle index is not UNDEF. Fail if it is.
5998  if (M[0] < 0)
5999  return false;
6000 
6001  Imm = M[0];
6002 
6003  // If this is a VEXT shuffle, the immediate value is the index of the first
6004  // element. The other shuffle indices must be the successive elements after
6005  // the first one.
6006  unsigned ExpectedElt = Imm;
6007  for (unsigned i = 1; i < NumElts; ++i) {
6008  // Increment the expected index. If it wraps around, it may still be
6009  // a VEXT but the source vectors must be swapped.
6010  ExpectedElt += 1;
6011  if (ExpectedElt == NumElts * 2) {
6012  ExpectedElt = 0;
6013  ReverseVEXT = true;
6014  }
6015 
6016  if (M[i] < 0) continue; // ignore UNDEF indices
6017  if (ExpectedElt != static_cast<unsigned>(M[i]))
6018  return false;
6019  }
6020 
6021  // Adjust the index value if the source operands will be swapped.
6022  if (ReverseVEXT)
6023  Imm -= NumElts;
6024 
6025  return true;
6026 }
6027 
6028 /// isVREVMask - Check if a vector shuffle corresponds to a VREV
6029 /// instruction with the specified blocksize. (The order of the elements
6030 /// within each block of the vector is reversed.)
6031 static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
6032  assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
6033  "Only possible block sizes for VREV are: 16, 32, 64");
6034 
6035  unsigned EltSz = VT.getScalarSizeInBits();
6036  if (EltSz == 64)
6037  return false;
6038 
6039  unsigned NumElts = VT.getVectorNumElements();
6040  unsigned BlockElts = M[0] + 1;
6041  // If the first shuffle index is UNDEF, be optimistic.
6042  if (M[0] < 0)
6043  BlockElts = BlockSize / EltSz;
6044 
6045  if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
6046  return false;
6047 
6048  for (unsigned i = 0; i < NumElts; ++i) {
6049  if (M[i] < 0) continue; // ignore UNDEF indices
6050  if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
6051  return false;
6052  }
6053 
6054  return true;
6055 }
6056 
6057 static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
6058  // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
6059  // range, then 0 is placed into the resulting vector. So pretty much any mask
6060  // of 8 elements can work here.
6061  return VT == MVT::v8i8 && M.size() == 8;
6062 }
6063 
6064 static unsigned SelectPairHalf(unsigned Elements, ArrayRef<int> Mask,
6065  unsigned Index) {
6066  if (Mask.size() == Elements * 2)
6067  return Index / Elements;
6068  return Mask[Index] == 0 ? 0 : 1;
6069 }
6070 
6071 // Checks whether the shuffle mask represents a vector transpose (VTRN) by
6072 // checking that pairs of elements in the shuffle mask represent the same index
6073 // in each vector, incrementing the expected index by 2 at each step.
6074 // e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
6075 // v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
6076 // v2={e,f,g,h}
6077 // WhichResult gives the offset for each element in the mask based on which
6078 // of the two results it belongs to.
6079 //
6080 // The transpose can be represented either as:
6081 // result1 = shufflevector v1, v2, result1_shuffle_mask
6082 // result2 = shufflevector v1, v2, result2_shuffle_mask
6083 // where v1/v2 and the shuffle masks have the same number of elements
6084 // (here WhichResult (see below) indicates which result is being checked)
6085 //
6086 // or as:
6087 // results = shufflevector v1, v2, shuffle_mask
6088 // where both results are returned in one vector and the shuffle mask has twice
6089 // as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
6090 // want to check the low half and high half of the shuffle mask as if it were
6091 // the other case
6092 static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
6093  unsigned EltSz = VT.getScalarSizeInBits();
6094  if (EltSz == 64)
6095  return false;
6096 
6097  unsigned NumElts = VT.getVectorNumElements();
6098  if (M.size() != NumElts && M.size() != NumElts*2)
6099  return false;
6100 
6101  // If the mask is twice as long as the input vector then we need to check the
6102  // upper and lower parts of the mask with a matching value for WhichResult
6103  // FIXME: A mask with only even values will be rejected in case the first
6104  // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
6105  // M[0] is used to determine WhichResult
6106  for (unsigned i = 0; i < M.size(); i += NumElts) {
6107  WhichResult = SelectPairHalf(NumElts, M, i);
6108  for (unsigned j = 0; j < NumElts; j += 2) {
6109  if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
6110  (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
6111  return false;
6112  }
6113  }
6114 
6115  if (M.size() == NumElts*2)
6116  WhichResult = 0;
6117 
6118  return true;
6119 }
6120 
6121 /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
6122 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
6123 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
6124 static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
6125  unsigned EltSz = VT.getScalarSizeInBits();
6126  if (EltSz == 64)
6127  return false;
6128 
6129  unsigned NumElts = VT.getVectorNumElements();
6130  if (M.size() != NumElts && M.size() != NumElts*2)
6131  return false;
6132 
6133  for (unsigned i = 0; i < M.size(); i += NumElts) {
6134  WhichResult = SelectPairHalf(NumElts, M, i);
6135  for (unsigned j = 0; j < NumElts; j += 2) {
6136  if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
6137  (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
6138  return false;
6139  }
6140  }
6141 
6142  if (M.size() == NumElts*2)
6143  WhichResult = 0;
6144 
6145  return true;
6146 }
6147 
6148 // Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
6149 // that the mask elements are either all even and in steps of size 2 or all odd
6150 // and in steps of size 2.
6151 // e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
6152 // v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
6153 // v2={e,f,g,h}
6154 // Requires similar checks to that of isVTRNMask with
6155 // respect the how results are returned.
6156 static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
6157  unsigned EltSz = VT.getScalarSizeInBits();
6158  if (EltSz == 64)
6159  return false;
6160 
6161  unsigned NumElts = VT.getVectorNumElements();
6162  if (M.size() != NumElts && M.size() != NumElts*2)
6163  return false;
6164 
6165  for (unsigned i = 0; i < M.size(); i += NumElts) {
6166  WhichResult = SelectPairHalf(NumElts, M, i);
6167  for (unsigned j = 0; j < NumElts; ++j) {
6168  if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
6169  return false;
6170  }
6171  }
6172 
6173  if (M.size() == NumElts*2)
6174  WhichResult = 0;
6175 
6176  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
6177  if (VT.is64BitVector() && EltSz == 32)
6178  return false;
6179 
6180  return true;
6181 }
6182 
6183 /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
6184 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
6185 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
6186 static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
6187  unsigned EltSz = VT.getScalarSizeInBits();
6188  if (EltSz == 64)
6189  return false;
6190 
6191  unsigned NumElts = VT.getVectorNumElements();
6192  if (M.size() != NumElts && M.size() != NumElts*2)
6193  return false;
6194 
6195  unsigned Half = NumElts / 2;
6196  for (unsigned i = 0; i < M.size(); i += NumElts) {
6197  WhichResult = SelectPairHalf(NumElts, M, i);
6198  for (unsigned j = 0; j < NumElts; j += Half) {
6199  unsigned Idx = WhichResult;
6200  for (unsigned k = 0; k < Half; ++k) {
6201  int MIdx = M[i + j + k];
6202  if (MIdx >= 0 && (unsigned) MIdx != Idx)
6203  return false;
6204  Idx += 2;
6205  }
6206  }
6207  }
6208 
6209  if (M.size() == NumElts*2)
6210  WhichResult = 0;
6211 
6212  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
6213  if (VT.is64BitVector() && EltSz == 32)
6214  return false;
6215 
6216  return true;
6217 }
6218 
6219 // Checks whether the shuffle mask represents a vector zip (VZIP) by checking
6220 // that pairs of elements of the shufflemask represent the same index in each
6221 // vector incrementing sequentially through the vectors.
6222 // e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
6223 // v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
6224 // v2={e,f,g,h}
6225 // Requires similar checks to that of isVTRNMask with respect the how results
6226 // are returned.
6227 static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
6228  unsigned EltSz = VT.getScalarSizeInBits();
6229  if (EltSz == 64)
6230  return false;
6231 
6232  unsigned NumElts = VT.getVectorNumElements();
6233  if (M.size() != NumElts && M.size() != NumElts*2)
6234  return false;
6235 
6236  for (unsigned i = 0; i < M.size(); i += NumElts) {
6237  WhichResult = SelectPairHalf(NumElts, M, i);
6238  unsigned Idx = WhichResult * NumElts / 2;
6239  for (unsigned j = 0; j < NumElts; j += 2) {
6240  if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
6241  (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
6242  return false;
6243  Idx += 1;
6244  }
6245  }
6246 
6247  if (M.size() == NumElts*2)
6248  WhichResult = 0;
6249 
6250  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
6251  if (VT.is64BitVector() && EltSz == 32)
6252  return false;
6253 
6254  return true;
6255 }
6256 
6257 /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
6258 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
6259 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
6260 static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
6261  unsigned EltSz = VT.getScalarSizeInBits();
6262  if (EltSz == 64)
6263  return false;
6264 
6265  unsigned NumElts = VT.getVectorNumElements();
6266  if (M.size() != NumElts && M.size() != NumElts*2)
6267  return false;
6268 
6269  for (unsigned i = 0; i < M.size(); i += NumElts) {
6270  WhichResult = SelectPairHalf(NumElts, M, i);
6271  unsigned Idx = WhichResult * NumElts / 2;
6272  for (unsigned j = 0; j < NumElts; j += 2) {
6273  if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
6274  (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
6275  return false;
6276  Idx += 1;
6277  }
6278  }
6279 
6280  if (M.size() == NumElts*2)
6281  WhichResult = 0;
6282 
6283  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
6284  if (VT.is64BitVector() && EltSz == 32)
6285  return false;
6286 
6287  return true;
6288 }
6289 
6290 /// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
6291 /// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
6292 static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
6293  unsigned &WhichResult,
6294  bool &isV_UNDEF) {
6295  isV_UNDEF = false;
6296  if (isVTRNMask(ShuffleMask, VT, WhichResult))
6297  return ARMISD::VTRN;
6298  if (isVUZPMask(ShuffleMask, VT, WhichResult))
6299  return ARMISD::VUZP;
6300  if (isVZIPMask(ShuffleMask, VT, WhichResult))
6301  return ARMISD::VZIP;
6302 
6303  isV_UNDEF = true;
6304  if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
6305  return ARMISD::VTRN;
6306  if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
6307  return ARMISD::VUZP;
6308  if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
6309  return ARMISD::VZIP;
6310 
6311  return 0;
6312 }
6313 
6314 /// \return true if this is a reverse operation on an vector.
6315 static bool isReverseMask(ArrayRef<int> M, EVT VT) {
6316  unsigned NumElts = VT.getVectorNumElements();
6317  // Make sure the mask has the right size.
6318  if (NumElts != M.size())
6319  return false;
6320 
6321  // Look for <15, ..., 3, -1, 1, 0>.
6322  for (unsigned i = 0; i != NumElts; ++i)
6323  if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
6324  return false;
6325 
6326  return true;
6327 }
6328 
6329 // If N is an integer constant that can be moved into a register in one
6330 // instruction, return an SDValue of such a constant (will become a MOV
6331 // instruction). Otherwise return null.
6333  const ARMSubtarget *ST, const SDLoc &dl) {
6334  uint64_t Val;
6335  if (!isa<ConstantSDNode>(N))
6336  return SDValue();
6337  Val = cast<ConstantSDNode>(N)->getZExtValue();
6338 
6339  if (ST->isThumb1Only()) {
6340  if (Val <= 255 || ~Val <= 255)
6341  return DAG.getConstant(Val, dl, MVT::i32);
6342  } else {
6343  if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
6344  return DAG.getConstant(Val, dl, MVT::i32);
6345  }
6346  return SDValue();
6347 }
6348 
6349 // If this is a case we can't handle, return null and let the default
6350 // expansion code take care of it.
6351 SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
6352  const ARMSubtarget *ST) const {
6353  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
6354  SDLoc dl(Op);
6355  EVT VT = Op.getValueType();
6356 
6357  APInt SplatBits, SplatUndef;
6358  unsigned SplatBitSize;
6359  bool HasAnyUndefs;
6360  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
6361  if (SplatUndef.isAllOnesValue())
6362  return DAG.getUNDEF(VT);
6363 
6364  if (SplatBitSize <= 64) {
6365  // Check if an immediate VMOV works.
6366  EVT VmovVT;
6367  SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
6368  SplatUndef.getZExtValue(), SplatBitSize,
6369  DAG, dl, VmovVT, VT.is128BitVector(),
6370  VMOVModImm);
6371  if (Val.getNode()) {
6372  SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
6373  return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6374  }
6375 
6376  // Try an immediate VMVN.
6377  uint64_t NegatedImm = (~SplatBits).getZExtValue();
6378  Val = isNEONModifiedImm(NegatedImm,
6379  SplatUndef.getZExtValue(), SplatBitSize,
6380  DAG, dl, VmovVT, VT.is128BitVector(),
6381  VMVNModImm);
6382  if (Val.getNode()) {
6383  SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
6384  return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6385  }
6386 
6387  // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
6388  if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
6389  int ImmVal = ARM_AM::getFP32Imm(SplatBits);
6390  if (ImmVal != -1) {
6391  SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
6392  return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
6393  }
6394  }
6395  }
6396  }
6397 
6398  // Scan through the operands to see if only one value is used.
6399  //
6400  // As an optimisation, even if more than one value is used it may be more
6401  // profitable to splat with one value then change some lanes.
6402  //
6403  // Heuristically we decide to do this if the vector has a "dominant" value,
6404  // defined as splatted to more than half of the lanes.
6405  unsigned NumElts = VT.getVectorNumElements();
6406  bool isOnlyLowElement = true;
6407  bool usesOnlyOneValue = true;
6408  bool hasDominantValue = false;
6409  bool isConstant = true;
6410 
6411  // Map of the number of times a particular SDValue appears in the
6412  // element list.
6413  DenseMap<SDValue, unsigned> ValueCounts;
6414  SDValue Value;
6415  for (unsigned i = 0; i < NumElts; ++i) {
6416  SDValue V = Op.getOperand(i);
6417  if (V.isUndef())
6418  continue;
6419  if (i > 0)
6420  isOnlyLowElement = false;
6421  if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
6422  isConstant = false;
6423 
6424  ValueCounts.insert(std::make_pair(V, 0));
6425  unsigned &Count = ValueCounts[V];
6426 
6427  // Is this value dominant? (takes up more than half of the lanes)
6428  if (++Count > (NumElts / 2)) {
6429  hasDominantValue = true;
6430  Value = V;
6431  }
6432  }
6433  if (ValueCounts.size() != 1)
6434  usesOnlyOneValue = false;
6435  if (!Value.getNode() && !ValueCounts.empty())
6436  Value = ValueCounts.begin()->first;
6437 
6438  if (ValueCounts.empty())
6439  return DAG.getUNDEF(VT);
6440 
6441  // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
6442  // Keep going if we are hitting this case.
6443  if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
6444  return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
6445 
6446  unsigned EltSize = VT.getScalarSizeInBits();
6447 
6448  // Use VDUP for non-constant splats. For f32 constant splats, reduce to
6449  // i32 and try again.
6450  if (hasDominantValue && EltSize <= 32) {
6451  if (!isConstant) {
6452  SDValue N;
6453 
6454  // If we are VDUPing a value that comes directly from a vector, that will
6455  // cause an unnecessary move to and from a GPR, where instead we could
6456  // just use VDUPLANE. We can only do this if the lane being extracted
6457  // is at a constant index, as the VDUP from lane instructions only have
6458  // constant-index forms.
6459  ConstantSDNode *constIndex;
6460  if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6461  (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
6462  // We need to create a new undef vector to use for the VDUPLANE if the
6463  // size of the vector from which we get the value is different than the
6464  // size of the vector that we need to create. We will insert the element
6465  // such that the register coalescer will remove unnecessary copies.
6466  if (VT != Value->getOperand(0).getValueType()) {
6467  unsigned index = constIndex->getAPIntValue().getLimitedValue() %
6468  VT.getVectorNumElements();
6469  N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
6470  DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
6471  Value, DAG.getConstant(index, dl, MVT::i32)),
6472  DAG.getConstant(index, dl, MVT::i32));
6473  } else
6474  N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
6475  Value->getOperand(0), Value->getOperand(1));
6476  } else
6477  N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
6478 
6479  if (!usesOnlyOneValue) {
6480  // The dominant value was splatted as 'N', but we now have to insert
6481  // all differing elements.
6482  for (unsigned I = 0; I < NumElts; ++I) {
6483  if (Op.getOperand(I) == Value)
6484  continue;
6486  Ops.push_back(N);
6487  Ops.push_back(Op.getOperand(I));
6488  Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
6489  N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
6490  }
6491  }
6492  return N;
6493  }
6496  for (unsigned i = 0; i < NumElts; ++i)
6497  Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
6498  Op.getOperand(i)));
6499  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
6500  SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
6501  Val = LowerBUILD_VECTOR(Val, DAG, ST);
6502  if (Val.getNode())
6503  return DAG.getNode(ISD::BITCAST, dl, VT, Val);
6504  }
6505  if (usesOnlyOneValue) {
6506  SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
6507  if (isConstant && Val.getNode())
6508  return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
6509  }
6510  }
6511 
6512  // If all elements are constants and the case above didn't get hit, fall back
6513  // to the default expansion, which will generate a load from the constant
6514  // pool.
6515  if (isConstant)
6516  return SDValue();
6517 
6518  // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
6519  if (NumElts >= 4) {
6520  SDValue shuffle = ReconstructShuffle(Op, DAG);
6521  if (shuffle != SDValue())
6522  return shuffle;
6523  }
6524 
6525  if (VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
6526  // If we haven't found an efficient lowering, try splitting a 128-bit vector
6527  // into two 64-bit vectors; we might discover a better way to lower it.
6528  SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
6529  EVT ExtVT = VT.getVectorElementType();
6530  EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);
6531  SDValue Lower =
6532  DAG.getBuildVector(HVT, dl, makeArrayRef(&Ops[0], NumElts / 2));
6533  if (Lower.getOpcode() == ISD::BUILD_VECTOR)
6534  Lower = LowerBUILD_VECTOR(Lower, DAG, ST);
6536  HVT, dl, makeArrayRef(&Ops[NumElts / 2], NumElts / 2));
6537  if (Upper.getOpcode() == ISD::BUILD_VECTOR)
6538  Upper = LowerBUILD_VECTOR(Upper, DAG, ST);
6539  if (Lower && Upper)
6540  return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);
6541  }
6542 
6543  // Vectors with 32- or 64-bit elements can be built by directly assigning
6544  // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
6545  // will be legalized.
6546  if (EltSize >= 32) {
6547  // Do the expansion with floating-point types, since that is what the VFP
6548  // registers are defined to use, and since i64 is not legal.
6549  EVT EltVT = EVT::getFloatingPointVT(EltSize);
6550  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
6552  for (unsigned i = 0; i < NumElts; ++i)
6553  Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
6554  SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
6555  return DAG.getNode(ISD::BITCAST, dl, VT, Val);
6556  }
6557 
6558  // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
6559  // know the default expansion would otherwise fall back on something even
6560  // worse. For a vector with one or two non-undef values, that's
6561  // scalar_to_vector for the elements followed by a shuffle (provided the
6562  // shuffle is valid for the target) and materialization element by element
6563  // on the stack followed by a load for everything else.
6564  if (!isConstant && !usesOnlyOneValue) {
6565  SDValue Vec = DAG.getUNDEF(VT);
6566  for (unsigned i = 0 ; i < NumElts; ++i) {
6567  SDValue V = Op.getOperand(i);
6568  if (V.isUndef())
6569  continue;
6570  SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
6571  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
6572  }
6573  return Vec;
6574  }
6575 
6576  return SDValue();
6577 }
6578 
6579 // Gather data to see if the operation can be modelled as a
6580 // shuffle in combination with VEXTs.
6581 SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
6582  SelectionDAG &DAG) const {
6583  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
6584  SDLoc dl(Op);
6585  EVT VT = Op.getValueType();
6586  unsigned NumElts = VT.getVectorNumElements();
6587 
6588  struct ShuffleSourceInfo {
6589  SDValue Vec;
6590  unsigned MinElt = std::numeric_limits<unsigned>::max();
6591  unsigned MaxElt = 0;
6592 
6593  // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
6594  // be compatible with the shuffle we intend to construct. As a result
6595  // ShuffleVec will be some sliding window into the original Vec.
6596  SDValue ShuffleVec;
6597 
6598  // Code should guarantee that element i in Vec starts at element "WindowBase
6599  // + i * WindowScale in ShuffleVec".
6600  int WindowBase = 0;
6601  int WindowScale = 1;
6602 
6603  ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
6604 
6605  bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
6606  };
6607 
6608  // First gather all vectors used as an immediate source for this BUILD_VECTOR
6609  // node.
6611  for (unsigned i = 0; i < NumElts; ++i) {
6612  SDValue V = Op.getOperand(i);
6613  if (V.isUndef())
6614  continue;
6615  else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
6616  // A shuffle can only come from building a vector from various
6617  // elements of other vectors.
6618  return SDValue();
6619  } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
6620  // Furthermore, shuffles require a constant mask, whereas extractelts
6621  // accept variable indices.
6622  return SDValue();
6623  }
6624 
6625  // Add this element source to the list if it's not already there.
6626  SDValue SourceVec = V.getOperand(0);
6627  auto Source = llvm::find(Sources, SourceVec);
6628  if (Source == Sources.end())
6629  Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
6630 
6631  // Update the minimum and maximum lane number seen.
6632  unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
6633  Source->MinElt = std::min(Source->MinElt, EltNo);
6634  Source->MaxElt = std::max(Source->MaxElt, EltNo);
6635  }
6636 
6637  // Currently only do something sane when at most two source vectors
6638  // are involved.
6639  if (Sources.size() > 2)
6640  return SDValue();
6641 
6642  // Find out the smallest element size among result and two sources, and use
6643  // it as element size to build the shuffle_vector.
6644  EVT SmallestEltTy = VT.getVectorElementType();
6645  for (auto &Source : Sources) {
6646  EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
6647  if (SrcEltTy.bitsLT(SmallestEltTy))
6648  SmallestEltTy = SrcEltTy;
6649  }
6650  unsigned ResMultiplier =
6651  VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
6652  NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
6653  EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
6654 
6655  // If the source vector is too wide or too narrow, we may nevertheless be able
6656  // to construct a compatible shuffle either by concatenating it with UNDEF or
6657  // extracting a suitable range of elements.
6658  for (auto &Src : Sources) {
6659  EVT SrcVT = Src.ShuffleVec.getValueType();
6660 
6661  if (SrcVT.getSizeInBits() == VT.getSizeInBits())
6662  continue;
6663 
6664  // This stage of the search produces a source with the same element type as
6665  // the original, but with a total width matching the BUILD_VECTOR output.
6666  EVT EltVT = SrcVT.getVectorElementType();
6667  unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
6668  EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
6669 
6670  if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
6671  if (2 * SrcVT.getSizeInBits() != VT.getSizeInBits())
6672  return SDValue();
6673  // We can pad out the smaller vector for free, so if it's part of a
6674  // shuffle...
6675  Src.ShuffleVec =
6676  DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
6677  DAG.getUNDEF(Src.ShuffleVec.getValueType()));
6678  continue;
6679  }
6680 
6681  if (SrcVT.getSizeInBits() != 2 * VT.getSizeInBits())
6682  return SDValue();
6683 
6684  if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
6685  // Span too large for a VEXT to cope
6686  return SDValue();
6687  }
6688 
6689  if (Src.MinElt >= NumSrcElts) {
6690  // The extraction can just take the second half
6691  Src.ShuffleVec =
6692  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6693  DAG.getConstant(NumSrcElts, dl, MVT::i32));
6694  Src.WindowBase = -NumSrcElts;
6695  } else if (Src.MaxElt < NumSrcElts) {
6696  // The extraction can just take the first half
6697  Src.ShuffleVec =
6698  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6699  DAG.getConstant(0, dl, MVT::i32));
6700  } else {
6701  // An actual VEXT is needed
6702  SDValue VEXTSrc1 =
6703  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6704  DAG.getConstant(0, dl, MVT::i32));
6705  SDValue VEXTSrc2 =
6706  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6707  DAG.getConstant(NumSrcElts, dl, MVT::i32));
6708 
6709  Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
6710  VEXTSrc2,
6711  DAG.getConstant(Src.MinElt, dl, MVT::i32));
6712  Src.WindowBase = -Src.MinElt;
6713  }
6714  }
6715 
6716  // Another possible incompatibility occurs from the vector element types. We
6717  // can fix this by bitcasting the source vectors to the same type we intend
6718  // for the shuffle.
6719  for (auto &Src : Sources) {
6720  EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
6721  if (SrcEltTy == SmallestEltTy)
6722  continue;
6723  assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
6724  Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
6725  Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
6726  Src.WindowBase *= Src.WindowScale;
6727  }
6728 
6729  // Final sanity check before we try to actually produce a shuffle.
6730  LLVM_DEBUG(for (auto Src
6731  : Sources)
6732  assert(Src.ShuffleVec.getValueType() == ShuffleVT););
6733 
6734  // The stars all align, our next step is to produce the mask for the shuffle.
6736  int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
6737  for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
6738  SDValue Entry = Op.getOperand(i);
6739  if (Entry.isUndef())
6740  continue;
6741 
6742  auto Src = llvm::find(Sources, Entry.getOperand(0));
6743  int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
6744 
6745  // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
6746  // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
6747  // segment.
6748  EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
6749  int BitsDefined = std::min(OrigEltTy.getSizeInBits(),
6750  VT.getScalarSizeInBits());
6751  int LanesDefined = BitsDefined / BitsPerShuffleLane;
6752 
6753  // This source is expected to fill ResMultiplier lanes of the final shuffle,
6754  // starting at the appropriate offset.
6755  int *LaneMask = &Mask[i * ResMultiplier];
6756 
6757  int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
6758  ExtractBase += NumElts * (Src - Sources.begin());
6759  for (int j = 0; j < LanesDefined; ++j)
6760  LaneMask[j] = ExtractBase + j;
6761  }
6762 
6763  // Final check before we try to produce nonsense...
6764  if (!isShuffleMaskLegal(Mask, ShuffleVT))
6765  return SDValue();
6766 
6767  // We can't handle more than two sources. This should have already
6768  // been checked before this point.
6769  assert(Sources.size() <= 2 && "Too many sources!");
6770 
6771  SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
6772  for (unsigned i = 0; i < Sources.size(); ++i)
6773  ShuffleOps[i] = Sources[i].ShuffleVec;
6774 
6775  SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
6776  ShuffleOps[1], Mask);
6777  return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
6778 }
6779 
6780 /// isShuffleMaskLegal - Targets can use this to indicate that they only
6781 /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
6782 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
6783 /// are assumed to be legal.
6785  if (VT.getVectorNumElements() == 4 &&
6786  (VT.is128BitVector() || VT.is64BitVector())) {
6787  unsigned PFIndexes[4];
6788  for (unsigned i = 0; i != 4; ++i) {
6789  if (M[i] < 0)
6790  PFIndexes[i] = 8;
6791  else
6792  PFIndexes[i] = M[i];
6793  }
6794 
6795  // Compute the index in the perfect shuffle table.
6796  unsigned PFTableIndex =
6797  PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
6798  unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
6799  unsigned Cost = (PFEntry >> 30);
6800 
6801  if (Cost <= 4)
6802  return true;
6803  }
6804 
6805  bool ReverseVEXT, isV_UNDEF;
6806  unsigned Imm, WhichResult;
6807 
6808  unsigned EltSize = VT.getScalarSizeInBits();
6809  return (EltSize >= 32 ||
6810  ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
6811  isVREVMask(M, VT, 64) ||
6812  isVREVMask(M, VT, 32) ||
6813  isVREVMask(M, VT, 16) ||
6814  isVEXTMask(M, VT, ReverseVEXT, Imm) ||
6815  isVTBLMask(M, VT) ||
6816  isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF) ||
6817  ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));
6818 }
6819 
6820 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
6821 /// the specified operations to build the shuffle.
6822 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
6823  SDValue RHS, SelectionDAG &DAG,
6824  const SDLoc &dl) {
6825  unsigned OpNum = (PFEntry >> 26) & 0x0F;
6826  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
6827  unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
6828 
6829  enum {
6830  OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
6831  OP_VREV,
6832  OP_VDUP0,
6833  OP_VDUP1,
6834  OP_VDUP2,
6835  OP_VDUP3,
6836  OP_VEXT1,
6837  OP_VEXT2,
6838  OP_VEXT3,
6839  OP_VUZPL, // VUZP, left result
6840  OP_VUZPR, // VUZP, right result
6841  OP_VZIPL, // VZIP, left result
6842  OP_VZIPR, // VZIP, right result
6843  OP_VTRNL, // VTRN, left result
6844  OP_VTRNR // VTRN, right result
6845  };
6846 
6847  if (OpNum == OP_COPY) {
6848  if (LHSID == (1*9+2)*9+3) return LHS;
6849  assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
6850  return RHS;
6851  }
6852 
6853  SDValue OpLHS, OpRHS;
6854  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
6855  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
6856  EVT VT = OpLHS.getValueType();
6857 
6858  switch (OpNum) {
6859  default: llvm_unreachable("Unknown shuffle opcode!");
6860  case OP_VREV:
6861  // VREV divides the vector in half and swaps within the half.
6862  if (VT.getVectorElementType() == MVT::i32 ||
6864  return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
6865  // vrev <4 x i16> -> VREV32
6866  if (VT.getVectorElementType() == MVT::i16)
6867  return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
6868  // vrev <4 x i8> -> VREV16
6870  return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
6871  case OP_VDUP0:
6872  case OP_VDUP1:
6873  case OP_VDUP2:
6874  case OP_VDUP3:
6875  return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
6876  OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
6877  case OP_VEXT1:
6878  case OP_VEXT2:
6879  case OP_VEXT3:
6880  return DAG.getNode(ARMISD::VEXT, dl, VT,
6881  OpLHS, OpRHS,
6882  DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
6883  case OP_VUZPL:
6884  case OP_VUZPR:
6885  return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
6886  OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
6887  case OP_VZIPL:
6888  case OP_VZIPR:
6889  return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
6890  OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
6891  case OP_VTRNL:
6892  case OP_VTRNR:
6893  return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
6894  OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
6895  }
6896 }
6897 
6899  ArrayRef<int> ShuffleMask,
6900  SelectionDAG &DAG) {
6901  // Check to see if we can use the VTBL instruction.
6902  SDValue V1 = Op.getOperand(0);
6903  SDValue V2 = Op.getOperand(1);
6904  SDLoc DL(Op);
6905 
6906  SmallVector<SDValue, 8> VTBLMask;
6908  I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
6909  VTBLMask.push_back(DAG.getConstant(*I, DL, MVT::i32));
6910 
6911  if (V2.getNode()->isUndef())
6912  return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
6913  DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
6914 
6915  return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
6916  DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
6917 }
6918 
6920  SelectionDAG &DAG) {
6921  SDLoc DL(Op);
6922  SDValue OpLHS = Op.getOperand(0);
6923  EVT VT = OpLHS.getValueType();
6924 
6925  assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&
6926  "Expect an v8i16/v16i8 type");
6927  OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
6928  // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
6929  // extract the first 8 bytes into the top double word and the last 8 bytes
6930  // into the bottom double word. The v8i16 case is similar.
6931  unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
6932  return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
6933  DAG.getConstant(ExtractNum, DL, MVT::i32));
6934 }
6935 
6937  SDValue V1 = Op.getOperand(0);
6938  SDValue V2 = Op.getOperand(1);
6939  SDLoc dl(Op);
6940  EVT VT = Op.getValueType();
6941  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
6942 
6943  // Convert shuffles that are directly supported on NEON to target-specific
6944  // DAG nodes, instead of keeping them as shuffles and matching them again
6945  // during code selection. This is more efficient and avoids the possibility
6946  // of inconsistencies between legalization and selection.
6947  // FIXME: floating-point vectors should be canonicalized to integer vectors
6948  // of the same time so that they get CSEd properly.
6949  ArrayRef<int> ShuffleMask = SVN->getMask();
6950 
6951  unsigned EltSize = VT.getScalarSizeInBits();
6952  if (EltSize <= 32) {
6953  if (SVN->isSplat()) {
6954  int Lane = SVN->getSplatIndex();
6955  // If this is undef splat, generate it via "just" vdup, if possible.
6956  if (Lane == -1) Lane = 0;
6957 
6958  // Test if V1 is a SCALAR_TO_VECTOR.
6959  if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
6960  return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
6961  }
6962  // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
6963  // (and probably will turn into a SCALAR_TO_VECTOR once legalization
6964  // reaches it).
6965  if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
6966  !isa<ConstantSDNode>(V1.getOperand(0))) {
6967  bool IsScalarToVector = true;
6968  for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
6969  if (!V1.getOperand(i).isUndef()) {
6970  IsScalarToVector = false;
6971  break;
6972  }
6973  if (IsScalarToVector)
6974  return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
6975  }
6976  return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
6977  DAG.getConstant(Lane, dl, MVT::i32));
6978  }
6979 
6980  bool ReverseVEXT;
6981  unsigned Imm;
6982  if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
6983  if (ReverseVEXT)
6984  std::swap(V1, V2);
6985  return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
6986  DAG.getConstant(Imm, dl, MVT::i32));
6987  }
6988 
6989  if (isVREVMask(ShuffleMask, VT, 64))
6990  return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
6991  if (isVREVMask(ShuffleMask, VT, 32))
6992  return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
6993  if (isVREVMask(ShuffleMask, VT, 16))
6994  return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
6995 
6996  if (V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
6997  return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
6998  DAG.getConstant(Imm, dl, MVT::i32));
6999  }
7000 
7001  // Check for Neon shuffles that modify both input vectors in place.
7002  // If both results are used, i.e., if there are two shuffles with the same
7003  // source operands and with masks corresponding to both results of one of
7004  // these operations, DAG memoization will ensure that a single node is
7005  // used for both shuffles.
7006  unsigned WhichResult;
7007  bool isV_UNDEF;
7008  if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
7009  ShuffleMask, VT, WhichResult, isV_UNDEF)) {
7010  if (isV_UNDEF)
7011  V2 = V1;
7012  return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
7013  .getValue(WhichResult);
7014  }
7015 
7016  // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
7017  // shuffles that produce a result larger than their operands with:
7018  // shuffle(concat(v1, undef), concat(v2, undef))
7019  // ->
7020  // shuffle(concat(v1, v2), undef)
7021  // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
7022  //
7023  // This is useful in the general case, but there are special cases where
7024  // native shuffles produce larger results: the two-result ops.
7025  //
7026  // Look through the concat when lowering them:
7027  // shuffle(concat(v1, v2), undef)
7028  // ->
7029  // concat(VZIP(v1, v2):0, :1)
7030  //
7031  if (V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
7032  SDValue SubV1 = V1->getOperand(0);
7033  SDValue SubV2 = V1->getOperand(1);
7034  EVT SubVT = SubV1.getValueType();
7035 
7036  // We expect these to have been canonicalized to -1.
7037  assert(llvm::all_of(ShuffleMask, [&](int i) {
7038  return i < (int)VT.getVectorNumElements();
7039  }) && "Unexpected shuffle index into UNDEF operand!");
7040 
7041  if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
7042  ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
7043  if (isV_UNDEF)
7044  SubV2 = SubV1;
7045  assert((WhichResult == 0) &&
7046  "In-place shuffle of concat can only have one result!");
7047  SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
7048  SubV1, SubV2);
7049  return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
7050  Res.getValue(1));
7051  }
7052  }
7053  }
7054 
7055  // If the shuffle is not directly supported and it has 4 elements, use
7056  // the PerfectShuffle-generated table to synthesize it from other shuffles.
7057  unsigned NumElts = VT.getVectorNumElements();
7058  if (NumElts == 4) {
7059  unsigned PFIndexes[4];
7060  for (unsigned i = 0; i != 4; ++i) {
7061  if (ShuffleMask[i] < 0)
7062  PFIndexes[i] = 8;
7063  else
7064  PFIndexes[i] = ShuffleMask[i];
7065  }
7066 
7067  // Compute the index in the perfect shuffle table.
7068  unsigned PFTableIndex =
7069  PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
7070  unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
7071  unsigned Cost = (PFEntry >> 30);
7072 
7073  if (Cost <= 4)
7074  return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
7075  }
7076 
7077  // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
7078  if (EltSize >= 32) {
7079  // Do the expansion with floating-point types, since that is what the VFP
7080  // registers are defined to use, and since i64 is not legal.
7081  EVT EltVT = EVT::getFloatingPointVT(EltSize);
7082  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
7083  V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
7084  V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
7086  for (unsigned i = 0; i < NumElts; ++i) {
7087  if (ShuffleMask[i] < 0)
7088  Ops.push_back(DAG.getUNDEF(EltVT));
7089  else
7090  Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
7091  ShuffleMask[i] < (int)NumElts ? V1 : V2,
7092  DAG.getConstant(ShuffleMask[i] & (NumElts-1),
7093  dl, MVT::i32)));
7094  }
7095  SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
7096  return DAG.getNode(ISD::BITCAST, dl, VT, Val);
7097  }
7098 
7099  if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
7101 
7102  if (VT == MVT::v8i8)
7103  if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
7104  return NewOp;
7105 
7106  return SDValue();
7107 }
7108 
7110  // INSERT_VECTOR_ELT is legal only for immediate indexes.
7111  SDValue Lane = Op.getOperand(2);
7112  if (!isa<ConstantSDNode>(Lane))
7113  return SDValue();
7114 
7115  return Op;
7116 }
7117 
7119  // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
7120  SDValue Lane = Op.getOperand(1);
7121  if (!isa<ConstantSDNode>(Lane))
7122  return SDValue();
7123 
7124  SDValue Vec = Op.getOperand(0);
7125  if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
7126  SDLoc dl(Op);
7127  return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
7128  }
7129 
7130  return Op;
7131 }
7132 
7134  // The only time a CONCAT_VECTORS operation can have legal types is when
7135  // two 64-bit vectors are concatenated to a 128-bit vector.
7136  assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
7137  "unexpected CONCAT_VECTORS");
7138  SDLoc dl(Op);
7139  SDValue Val = DAG.getUNDEF(MVT::v2f64);
7140  SDValue Op0 = Op.getOperand(0);
7141  SDValue Op1 = Op.getOperand(1);
7142  if (!Op0.isUndef())
7143  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
7144  DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
7145  DAG.getIntPtrConstant(0, dl));
7146  if (!Op1.isUndef())
7147  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
7148  DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
7149  DAG.getIntPtrConstant(1, dl));
7150  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
7151 }
7152 
7153 /// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
7154 /// element has been zero/sign-extended, depending on the isSigned parameter,
7155 /// from an integer type half its size.
7157  bool isSigned) {
7158  // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
7159  EVT VT = N->getValueType(0);
7160  if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
7161  SDNode *BVN = N->getOperand(0).getNode();
7162  if (BVN->getValueType(0) != MVT::v4i32 ||
7163  BVN->getOpcode() != ISD::BUILD_VECTOR)
7164  return false;
7165  unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
7166  unsigned HiElt = 1 - LoElt;
7167  ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
7168  ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
7169  ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
7170  ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
7171  if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
7172  return false;
7173  if (isSigned) {
7174  if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
7175  Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
7176  return true;
7177  } else {
7178  if (Hi0->isNullValue() && Hi1->isNullValue())
7179  return true;
7180  }
7181  return false;
7182  }
7183 
7184  if (N->getOpcode() != ISD::BUILD_VECTOR)
7185  return false;
7186 
7187  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
7188  SDNode *Elt = N->getOperand(i).getNode();
7189  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
7190  unsigned EltSize = VT.getScalarSizeInBits();
7191  unsigned HalfSize = EltSize / 2;
7192  if (isSigned) {
7193  if (!isIntN(HalfSize, C->getSExtValue()))
7194  return false;
7195  } else {
7196  if (!isUIntN(HalfSize, C->getZExtValue()))
7197  return false;
7198  }
7199  continue;
7200  }
7201  return false;
7202  }
7203 
7204  return true;
7205 }
7206 
7207 /// isSignExtended - Check if a node is a vector value that is sign-extended
7208 /// or a constant BUILD_VECTOR with sign-extended elements.
7209 static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
7210  if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
7211  return true;
7212  if (isExtendedBUILD_VECTOR(N, DAG, true))
7213  return true;
7214  return false;
7215 }
7216 
7217 /// isZeroExtended - Check if a node is a vector value that is zero-extended
7218 /// or a constant BUILD_VECTOR with zero-extended elements.
7219 static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
7220  if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
7221  return true;
7222  if (isExtendedBUILD_VECTOR(N, DAG, false))
7223  return true;
7224  return false;
7225 }
7226 
7227 static EVT getExtensionTo64Bits(const EVT &OrigVT) {
7228  if (OrigVT.getSizeInBits() >= 64)
7229  return OrigVT;
7230 
7231  assert(OrigVT.isSimple() && "Expecting a simple value type");
7232 
7233  MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
7234  switch (OrigSimpleTy) {
7235  default: llvm_unreachable("Unexpected Vector Type");
7236  case MVT::v2i8:
7237  case MVT::v2i16:
7238  return MVT::v2i32;
7239  case MVT::v4i8:
7240  return MVT::v4i16;
7241  }
7242 }
7243 
7244 /// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
7245 /// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
7246 /// We insert the required extension here to get the vector to fill a D register.
7248  const EVT &OrigTy,
7249  const EVT &ExtTy,
7250  unsigned ExtOpcode) {
7251  // The vector originally had a size of OrigTy. It was then extended to ExtTy.
7252  // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
7253  // 64-bits we need to insert a new extension so that it will be 64-bits.
7254  assert(ExtTy.is128BitVector() && "Unexpected extension size");
7255  if (OrigTy.getSizeInBits() >= 64)
7256  return N;
7257 
7258  // Must extend size to at least 64 bits to be used as an operand for VMULL.
7259  EVT NewVT = getExtensionTo64Bits(OrigTy);
7260 
7261  return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
7262 }
7263 
7264 /// SkipLoadExtensionForVMULL - return a load of the original vector size that
7265 /// does not do any sign/zero extension. If the original vector is less
7266 /// than 64 bits, an appropriate extension will be added after the load to
7267 /// reach a total size of 64 bits. We have to add the extension separately
7268 /// because ARM does not have a sign/zero extending load for vectors.
7270  EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
7271 
7272  // The load already has the right type.
7273  if (ExtendedTy == LD->getMemoryVT())
7274  return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
7275  LD->getBasePtr(), LD->getPointerInfo(),
7276  LD->getAlignment(), LD->getMemOperand()->getFlags());
7277 
7278  // We need to create a zextload/sextload. We cannot just create a load
7279  // followed by a zext/zext node because LowerMUL is also run during normal
7280  // operation legalization where we can't create illegal types.
7281  return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
7282  LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
7283  LD->getMemoryVT(), LD->getAlignment(),
7284  LD->getMemOperand()->getFlags());
7285 }
7286 
7287 /// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
7288 /// extending load, or BUILD_VECTOR with extended elements, return the
7289 /// unextended value. The unextended vector should be 64 bits so that it can
7290 /// be used as an operand to a VMULL instruction. If the original vector size
7291 /// before extension is less than 64 bits we add a an extension to resize
7292 /// the vector to 64 bits.
7294  if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
7295  return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
7296  N->getOperand(0)->getValueType(0),
7297  N->getValueType(0),
7298  N->getOpcode());
7299 
7300  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
7302  "Expected extending load");
7303 
7304  SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG);
7305  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1));
7306  unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
7307  SDValue extLoad =
7308  DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
7309  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad);
7310 
7311  return newLoad;
7312  }
7313 
7314  // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
7315  // have been legalized as a BITCAST from v4i32.
7316  if (N->getOpcode() == ISD::BITCAST) {
7317  SDNode *BVN = N->getOperand(0).getNode();
7318  assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
7319  BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
7320  unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
7321  return DAG.getBuildVector(
7322  MVT::v2i32, SDLoc(N),
7323  {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
7324  }
7325  // Construct a new BUILD_VECTOR with elements truncated to half the size.
7326  assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
7327  EVT VT = N->getValueType(0);
7328  unsigned EltSize = VT.getScalarSizeInBits() / 2;
7329  unsigned NumElts = VT.getVectorNumElements();
7330  MVT TruncVT = MVT::getIntegerVT(EltSize);
7332  SDLoc dl(N);
7333  for (unsigned i = 0; i != NumElts; ++i) {
7334  ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
7335  const APInt &CInt = C->getAPIntValue();
7336  // Element types smaller than 32 bits are not legal, so use i32 elements.
7337  // The values are implicitly truncated so sext vs. zext doesn't matter.
7338  Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
7339  }
7340  return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
7341 }
7342 
7343 static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
7344  unsigned Opcode = N->getOpcode();
7345  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
7346  SDNode *N0 = N->getOperand(0).getNode();
7347  SDNode *N1 = N->getOperand(1).getNode();
7348  return N0->hasOneUse() && N1->hasOneUse() &&
7349  isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
7350  }
7351  return false;
7352 }
7353 
7354 static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
7355  unsigned Opcode = N->getOpcode();
7356  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
7357  SDNode *N0 = N->getOperand(0).getNode();
7358  SDNode *N1 = N->getOperand(1).getNode();
7359  return N0->hasOneUse() && N1->hasOneUse() &&
7360  isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
7361  }
7362  return false;
7363 }
7364 
7366  // Multiplications are only custom-lowered for 128-bit vectors so that
7367  // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
7368  EVT VT = Op.getValueType();
7369  assert(VT.is128BitVector() && VT.isInteger() &&
7370  "unexpected type for custom-lowering ISD::MUL");
7371  SDNode *N0 = Op.getOperand(0).getNode();
7372  SDNode *N1 = Op.getOperand(1).getNode();
7373  unsigned NewOpc = 0;
7374  bool isMLA = false;
7375  bool isN0SExt = isSignExtended(N0, DAG);
7376  bool isN1SExt = isSignExtended(N1, DAG);
7377  if (isN0SExt && isN1SExt)
7378  NewOpc = ARMISD::VMULLs;
7379  else {
7380  bool isN0ZExt = isZeroExtended(N0, DAG);
7381  bool isN1ZExt = isZeroExtended(N1, DAG);
7382  if (isN0ZExt && isN1ZExt)
7383  NewOpc = ARMISD::VMULLu;
7384  else if (isN1SExt || isN1ZExt) {
7385  // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
7386  // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
7387  if (isN1SExt && isAddSubSExt(N0, DAG)) {
7388  NewOpc = ARMISD::VMULLs;
7389  isMLA = true;
7390  } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
7391  NewOpc = ARMISD::VMULLu;
7392  isMLA = true;
7393  } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
7394  std::swap(N0, N1);
7395  NewOpc = ARMISD::VMULLu;
7396  isMLA = true;
7397  }
7398  }
7399 
7400  if (!NewOpc) {
7401  if (VT == MVT::v2i64)
7402  // Fall through to expand this. It is not legal.
7403  return SDValue();
7404  else
7405  // Other vector multiplications are legal.
7406  return Op;
7407  }
7408  }
7409 
7410  // Legalize to a VMULL instruction.
7411  SDLoc DL(Op);
7412  SDValue Op0;
7413  SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
7414  if (!isMLA) {
7415  Op0 = SkipExtensionForVMULL(N0, DAG);
7416  assert(Op0.getValueType().is64BitVector() &&
7417  Op1.getValueType().is64BitVector() &&
7418  "unexpected types for extended operands to VMULL");
7419  return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
7420  }
7421 
7422  // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
7423  // isel lowering to take advantage of no-stall back to back vmul + vmla.
7424  // vmull q0, d4, d6
7425  // vmlal q0, d5, d6
7426  // is faster than
7427  // vaddl q0, d4, d5
7428  // vmovl q1, d6
7429  // vmul q0, q0, q1
7430  SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
7431  SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
7432  EVT Op1VT = Op1.getValueType();
7433  return DAG.getNode(N0->getOpcode(), DL, VT,
7434  DAG.getNode(NewOpc, DL, VT,
7435  DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
7436  DAG.getNode(NewOpc, DL, VT,
7437  DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
7438 }
7439 
7441  SelectionDAG &DAG) {
7442  // TODO: Should this propagate fast-math-flags?
7443 
7444  // Convert to float
7445  // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
7446  // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
7447  X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
7448  Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
7449  X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
7450  Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
7451  // Get reciprocal estimate.
7452  // float4 recip = vrecpeq_f32(yf);
7455  Y);
7456  // Because char has a smaller range than uchar, we can actually get away
7457  // without any newton steps. This requires that we use a weird bias
7458  // of 0xb000, however (again, this has been exhaustively tested).
7459  // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
7460  X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
7461  X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
7462  Y = DAG.getConstant(0xb000, dl, MVT::v4i32);
7463  X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
7464  X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
7465  // Convert back to short.
7466  X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
7467  X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
7468  return X;
7469 }
7470 
7471 static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl,
7472  SelectionDAG &DAG) {
7473  // TODO: Should this propagate fast-math-flags?
7474 
7475  SDValue N2;
7476  // Convert to float.
7477  // float4 yf = vcvt_f32_s32(vmovl_s16(y));
7478  // float4 xf = vcvt_f32_s32(vmovl_s16(x));
7479  N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
7480  N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
7481  N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
7482  N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
7483 
7484  // Use reciprocal estimate and one refinement step.
7485  // float4 recip = vrecpeq_f32(yf);
7486  // recip *= vrecpsq_f32(yf, recip);
7489  N1);
7492  N1, N2);
7493  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
7494  // Because short has a smaller range than ushort, we can actually get away
7495  // with only a single newton step. This requires that we use a weird bias
7496  // of 89, however (again, this has been exhaustively tested).
7497  // float4 result = as_float4(as_int4(xf*recip) + 0x89);
7498  N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
7499  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
7500  N1 = DAG.getConstant(0x89, dl, MVT::v4i32);
7501  N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
7502  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
7503  // Convert back to integer and return.
7504  // return vmovn_s32(vcvt_s32_f32(result));
7505  N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
7506  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
7507  return N0;
7508 }
7509 
7511  EVT VT = Op.getValueType();
7512  assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
7513  "unexpected type for custom-lowering ISD::SDIV");
7514 
7515  SDLoc dl(Op);
7516  SDValue N0 = Op.getOperand(0);
7517  SDValue N1 = Op.getOperand(1);
7518  SDValue N2, N3;
7519 
7520  if (VT == MVT::v8i8) {
7521  N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
7522  N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
7523 
7524  N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7525  DAG.getIntPtrConstant(4, dl));
7526  N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7527  DAG.getIntPtrConstant(4, dl));
7528  N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7529  DAG.getIntPtrConstant(0, dl));
7530  N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7531  DAG.getIntPtrConstant(0, dl));
7532 
7533  N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
7534  N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
7535 
7536  N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
7537  N0 = LowerCONCAT_VECTORS(N0, DAG);
7538 
7539  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
7540  return N0;
7541  }
7542  return LowerSDIV_v4i16(N0, N1, dl, DAG);
7543 }
7544 
7546  // TODO: Should this propagate fast-math-flags?
7547  EVT VT = Op.getValueType();
7548  assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
7549  "unexpected type for custom-lowering ISD::UDIV");
7550 
7551  SDLoc dl(Op);
7552  SDValue N0 = Op.getOperand(0);
7553  SDValue N1 = Op.getOperand(1);
7554  SDValue N2, N3;
7555 
7556  if (VT == MVT::v8i8) {
7557  N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
7558  N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
7559 
7560  N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7561  DAG.getIntPtrConstant(4, dl));
7562  N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7563  DAG.getIntPtrConstant(4, dl));
7564  N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7565  DAG.getIntPtrConstant(0, dl));
7566  N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7567  DAG.getIntPtrConstant(0, dl));
7568 
7569  N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
7570  N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
7571 
7572  N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
7573  N0 = LowerCONCAT_VECTORS(N0, DAG);
7574 
7577  MVT::i32),
7578  N0);
7579  return N0;
7580  }
7581 
7582  // v4i16 sdiv ... Convert to float.
7583  // float4 yf = vcvt_f32_s32(vmovl_u16(y));
7584  // float4 xf = vcvt_f32_s32(vmovl_u16(x));
7585  N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
7586  N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
7587  N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
7588  SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
7589 
7590  // Use reciprocal estimate and two refinement steps.
7591  // float4 recip = vrecpeq_f32(yf);
7592  // recip *= vrecpsq_f32(yf, recip);
7593  // recip *= vrecpsq_f32(yf, recip);
7596  BN1);
7599  BN1, N2);
7600  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
7603  BN1, N2);
7604  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
7605  // Simply multiplying by the reciprocal estimate can leave us a few ulps
7606  // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
7607  // and that it will never cause us to return an answer too large).
7608  // float4 result = as_float4(as_int4(xf*recip) + 2);
7609  N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
7610  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
7611  N1 = DAG.getConstant(2, dl, MVT::v4i32);
7612  N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
7613  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
7614  // Convert back to integer and return.
7615  // return vmovn_u32(vcvt_s32_f32(result));
7616  N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
7617  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
7618  return N0;
7619 }
7620 
7622  SDNode *N = Op.getNode();
7623  EVT VT = N->getValueType(0);
7624  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
7625 
7626  SDValue Carry = Op.getOperand(2);
7627 
7628  SDLoc DL(Op);
7629 
7630  SDValue Result;
7631  if (Op.getOpcode() == ISD::ADDCARRY) {
7632  // This converts the boolean value carry into the carry flag.
7633  Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
7634 
7635  // Do the addition proper using the carry flag we wanted.
7636  Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0),
7637  Op.getOperand(1), Carry);
7638 
7639  // Now convert the carry flag into a boolean value.
7640  Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
7641  } else {
7642  // ARMISD::SUBE expects a carry not a borrow like ISD::SUBCARRY so we
7643  // have to invert the carry first.
7644  Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
7645  DAG.getConstant(1, DL, MVT::i32), Carry);
7646  // This converts the boolean value carry into the carry flag.
7647  Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
7648 
7649  // Do the subtraction proper using the carry flag we wanted.
7650  Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0),
7651  Op.getOperand(1), Carry);
7652 
7653  // Now convert the carry flag into a boolean value.
7654  Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
7655  // But the carry returned by ARMISD::SUBE is not a borrow as expected
7656  // by ISD::SUBCARRY, so compute 1 - C.
7657  Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
7658  DAG.getConstant(1, DL, MVT::i32), Carry);
7659  }
7660 
7661  // Return both values.
7662  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry);
7663 }
7664 
7665 SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
7666  assert(Subtarget->isTargetDarwin());
7667 
7668  // For iOS, we want to call an alternative entry point: __sincos_stret,
7669  // return values are passed via sret.
7670  SDLoc dl(Op);
7671  SDValue Arg = Op.getOperand(0);
7672  EVT ArgVT = Arg.getValueType();
7673  Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
7674  auto PtrVT = getPointerTy(DAG.getDataLayout());
7675 
7677  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7678 
7679  // Pair of floats / doubles used to pass the result.
7680  Type *RetTy = StructType::get(ArgTy, ArgTy);
7681  auto &DL = DAG.getDataLayout();
7682 
7683  ArgListTy Args;
7684  bool ShouldUseSRet = Subtarget->isAPCS_ABI();
7685  SDValue SRet;
7686  if (ShouldUseSRet) {
7687  // Create stack object for sret.
7688  const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
7689  const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
7690  int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
7691  SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
7692 
7693  ArgListEntry Entry;
7694  Entry.Node = SRet;
7695  Entry.Ty = RetTy->getPointerTo();
7696  Entry.IsSExt = false;
7697  Entry.IsZExt = false;
7698  Entry.IsSRet = true;
7699  Args.push_back(Entry);
7700  RetTy = Type::getVoidTy(*DAG.getContext());
7701  }
7702 
7703  ArgListEntry Entry;
7704  Entry.Node = Arg;
7705  Entry.Ty = ArgTy;
7706  Entry.IsSExt = false;
7707  Entry.IsZExt = false;
7708  Args.push_back(Entry);
7709 
7710  RTLIB::Libcall LC =
7711  (ArgVT == MVT::f64) ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
7712  const char *LibcallName = getLibcallName(LC);
7714  SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
7715 
7717  CLI.setDebugLoc(dl)
7718  .setChain(DAG.getEntryNode())
7719  .setCallee(CC, RetTy, Callee, std::move(Args))
7720  .setDiscardResult(ShouldUseSRet);
7721  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
7722 
7723  if (!ShouldUseSRet)
7724  return CallResult.first;
7725 
7726  SDValue LoadSin =
7727  DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo());
7728 
7729  // Address of cos field.
7730  SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
7731  DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
7732  SDValue LoadCos =
7733  DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo());
7734 
7735  SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
7736  return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
7737  LoadSin.getValue(0), LoadCos.getValue(0));
7738 }
7739 
7740 SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
7741  bool Signed,
7742  SDValue &Chain) const {
7743  EVT VT = Op.getValueType();
7744  assert((VT == MVT::i32 || VT == MVT::i64) &&
7745  "unexpected type for custom lowering DIV");
7746  SDLoc dl(Op);
7747 
7748  const auto &DL = DAG.getDataLayout();
7749  const auto &TLI = DAG.getTargetLoweringInfo();
7750 
7751  const char *Name = nullptr;
7752  if (Signed)
7753  Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
7754  else
7755  Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
7756 
7757  SDValue ES = DAG.getExternalSymbol(Name, TLI.getPointerTy(DL));
7758 
7760 
7761  for (auto AI : {1, 0}) {
7762  ArgListEntry Arg;
7763  Arg.Node = Op.getOperand(AI);
7764  Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
7765  Args.push_back(Arg);
7766  }
7767 
7768  CallLoweringInfo CLI(DAG);
7769  CLI.setDebugLoc(dl)
7770  .setChain(Chain)
7771  .setCallee(CallingConv::ARM_AAPCS_VFP, VT.getTypeForEVT(*DAG.getContext()),
7772  ES, std::move(Args));
7773 
7774  return LowerCallTo(CLI).first;
7775 }
7776 
7777 // This is a code size optimisation: return the original SDIV node to
7778 // DAGCombiner when we don't want to expand SDIV into a sequence of
7779 // instructions, and an empty node otherwise which will cause the
7780 // SDIV to be expanded in DAGCombine.
7781 SDValue
7782 ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
7783  SelectionDAG &DAG,
7784  SmallVectorImpl<SDNode *> &Created) const {
7785  // TODO: Support SREM
7786  if (N->getOpcode() != ISD::SDIV)
7787  return SDValue();
7788 
7789  const auto &ST = static_cast<const ARMSubtarget&>(DAG.getSubtarget());
7790  const auto &MF = DAG.getMachineFunction();
7791  const bool MinSize = MF.getFunction().optForMinSize();
7792  const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()
7793  : ST.hasDivideInARMMode();
7794 
7795  // Don't touch vector types; rewriting this may lead to scalarizing
7796  // the int divs.
7797  if (N->getOperand(0).getValueType().isVector())
7798  return SDValue();
7799 
7800  // Bail if MinSize is not set, and also for both ARM and Thumb mode we need
7801  // hwdiv support for this to be really profitable.
7802  if (!(MinSize && HasDivide))
7803  return SDValue();
7804 
7805  // ARM mode is a bit simpler than Thumb: we can handle large power
7806  // of 2 immediates with 1 mov instruction; no further checks required,
7807  // just return the sdiv node.
7808  if (!ST.isThumb())
7809  return SDValue(N, 0);
7810 
7811  // In Thumb mode, immediates larger than 128 need a wide 4-byte MOV,
7812  // and thus lose the code size benefits of a MOVS that requires only 2.
7813  // TargetTransformInfo and 'getIntImmCodeSizeCost' could be helpful here,
7814  // but as it's doing exactly this, it's not worth the trouble to get TTI.
7815  if (Divisor.sgt(128))
7816  return SDValue();
7817 
7818  return SDValue(N, 0);
7819 }
7820 
7821 SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
7822  bool Signed) const {
7823  assert(Op.getValueType() == MVT::i32 &&
7824  "unexpected type for custom lowering DIV");
7825  SDLoc dl(Op);
7826 
7827  SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
7828  DAG.getEntryNode(), Op.getOperand(1));
7829 
7830  return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
7831 }
7832 
7834  SDLoc DL(N);
7835  SDValue Op = N->getOperand(1);
7836  if (N->getValueType(0) == MVT::i32)
7837  return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
7839  DAG.getConstant(0, DL, MVT::i32));
7841  DAG.getConstant(1, DL, MVT::i32));
7842  return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
7843  DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
7844 }
7845 
7846 void ARMTargetLowering::ExpandDIV_Windows(
7847  SDValue Op, SelectionDAG &DAG, bool Signed,
7849  const auto &DL = DAG.getDataLayout();
7850  const auto &TLI = DAG.getTargetLoweringInfo();
7851 
7852  assert(Op.getValueType() == MVT::i64 &&
7853  "unexpected type for custom lowering DIV");
7854  SDLoc dl(Op);
7855 
7856  SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());
7857 
7858  SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
7859 
7860  SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
7861  SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
7862  DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
7863  Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
7864 
7865  Results.push_back(Lower);
7866  Results.push_back(Upper);
7867 }
7868 
7870  if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getOrdering()))
7871  // Acquire/Release load/store is not legal for targets without a dmb or
7872  // equivalent available.
7873  return SDValue();
7874 
7875  // Monotonic load/store is legal for all targets.
7876  return Op;
7877 }
7878 
7880  SmallVectorImpl<SDValue> &Results,
7881  SelectionDAG &DAG,
7882  const ARMSubtarget *Subtarget) {
7883  SDLoc DL(N);
7884  // Under Power Management extensions, the cycle-count is:
7885  // mrc p15, #0, <Rt>, c9, c13, #0
7886  SDValue Ops[] = { N->getOperand(0), // Chain
7888  DAG.getConstant(15, DL, MVT::i32),
7889  DAG.getConstant(0, DL, MVT::i32),
7890  DAG.getConstant(9, DL, MVT::i32),
7891  DAG.getConstant(13, DL, MVT::i32),
7892  DAG.getConstant(0, DL, MVT::i32)
7893  };
7894 
7895  SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
7896  DAG.getVTList(MVT::i32, MVT::Other), Ops);
7897  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
7898  DAG.getConstant(0, DL, MVT::i32)));
7899  Results.push_back(Cycles32.getValue(1));
7900 }
7901 
7903  SDLoc dl(V.getNode());
7904  SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i32);
7905  SDValue VHi = DAG.getAnyExtOrTrunc(
7906  DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)),
7907  dl, MVT::i32);
7908  bool isBigEndian = DAG.getDataLayout().isBigEndian();
7909  if (isBigEndian)
7910  std::swap (VLo, VHi);
7911  SDValue RegClass =
7912  DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
7913  SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
7914  SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
7915  const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
7916  return SDValue(
7917  DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
7918 }
7919 
7921  SmallVectorImpl<SDValue> & Results,
7922  SelectionDAG &DAG) {
7923  assert(N->getValueType(0) == MVT::i64 &&
7924  "AtomicCmpSwap on types less than 64 should be legal");
7925  SDValue Ops[] = {N->getOperand(1),
7926  createGPRPairNode(DAG, N->getOperand(2)),
7927  createGPRPairNode(DAG, N->getOperand(3)),
7928  N->getOperand(0)};
7929  SDNode *CmpSwap = DAG.getMachineNode(
7930  ARM::CMP_SWAP_64, SDLoc(N),
7932 
7933  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
7934  DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
7935 
7936  bool isBigEndian = DAG.getDataLayout().isBigEndian();
7937 
7938  Results.push_back(
7939  DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0,
7940  SDLoc(N), MVT::i32, SDValue(CmpSwap, 0)));
7941  Results.push_back(
7942  DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1,
7943  SDLoc(N), MVT::i32, SDValue(CmpSwap, 0)));
7944  Results.push_back(SDValue(CmpSwap, 2));
7945 }
7946 
7947 static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget,
7948  SelectionDAG &DAG) {
7949  const auto &TLI = DAG.getTargetLoweringInfo();
7950 
7951  assert(Subtarget.getTargetTriple().isOSMSVCRT() &&
7952  "Custom lowering is MSVCRT specific!");
7953 
7954  SDLoc dl(Op);
7955  SDValue Val = Op.getOperand(0);
7956  MVT Ty = Val->getSimpleValueType(0);
7957  SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, dl, Ty, Op.getOperand(1));
7958  SDValue Callee = DAG.getExternalSymbol(Ty == MVT::f32 ? "powf" : "pow",
7959  TLI.getPointerTy(DAG.getDataLayout()));
7960 
7962  TargetLowering::ArgListEntry Entry;
7963 
7964  Entry.Node = Val;
7965  Entry.Ty = Val.getValueType().getTypeForEVT(*DAG.getContext());
7966  Entry.IsZExt = true;
7967  Args.push_back(Entry);
7968 
7969  Entry.Node = Exponent;
7970  Entry.Ty = Exponent.getValueType().getTypeForEVT(*DAG.getContext());
7971  Entry.IsZExt = true;
7972  Args.push_back(Entry);
7973 
7974  Type *LCRTy = Val.getValueType().getTypeForEVT(*DAG.getContext());
7975 
7976  // In the in-chain to the call is the entry node If we are emitting a
7977  // tailcall, the chain will be mutated if the node has a non-entry input
7978  // chain.
7979  SDValue InChain = DAG.getEntryNode();
7980  SDValue TCChain = InChain;
7981 
7982  const Function &F = DAG.getMachineFunction().getFunction();
7983  bool IsTC = TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
7984  F.getReturnType() == LCRTy;
7985  if (IsTC)
7986  InChain = TCChain;
7987 
7989  CLI.setDebugLoc(dl)
7990  .setChain(InChain)
7991  .setCallee(CallingConv::ARM_AAPCS_VFP, LCRTy, Callee, std::move(Args))
7992  .setTailCall(IsTC);
7993  std::pair<SDValue, SDValue> CI = TLI.LowerCallTo(CLI);
7994 
7995  // Return the chain (the DAG root) if it is a tail call
7996  return !CI.second.getNode() ? DAG.getRoot() : CI.first;
7997 }
7998 
8000  LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump());
8001  switch (Op.getOpcode()) {
8002  default: llvm_unreachable("Don't know how to custom lower this!");
8003  case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
8004  case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
8005  case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
8006  case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
8007  case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
8008  case ISD::SELECT: return LowerSELECT(Op, DAG);
8009  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
8010  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
8011  case ISD::BR_CC: return LowerBR_CC(Op, DAG);
8012  case ISD::BR_JT: return LowerBR_JT(Op, DAG);
8013  case ISD::VASTART: return LowerVASTART(Op, DAG);
8014  case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
8015  case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
8016  case ISD::SINT_TO_FP:
8017  case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
8018  case ISD::FP_TO_SINT:
8019  case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
8020  case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
8021  case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
8022  case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
8023  case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
8024  case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
8025  case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
8026  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
8027  Subtarget);
8028  case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG, Subtarget);
8029  case ISD::SHL:
8030  case ISD::SRL:
8031  case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
8032  case ISD::SREM: return LowerREM(Op.getNode(), DAG);
8033  case ISD::UREM: return LowerREM(Op.getNode(), DAG);
8034  case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
8035  case ISD::SRL_PARTS:
8036  case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
8037  case ISD::CTTZ:
8038  case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
8039  case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
8040  case ISD::SETCC: return LowerVSETCC(Op, DAG);
8041  case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG);
8042  case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
8043  case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
8044  case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
8045  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
8046  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
8047  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
8048  case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
8049  case ISD::MUL: return LowerMUL(Op, DAG);
8050  case ISD::SDIV:
8051  if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
8052  return LowerDIV_Windows(Op, DAG, /* Signed */ true);
8053  return LowerSDIV(Op, DAG);
8054  case ISD::UDIV:
8055  if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
8056  return LowerDIV_Windows(Op, DAG, /* Signed */ false);
8057  return LowerUDIV(Op, DAG);
8058  case ISD::ADDCARRY:
8059  case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG);
8060  case ISD::SADDO:
8061  case ISD::SSUBO:
8062  return LowerSignedALUO(Op, DAG);
8063  case ISD::UADDO:
8064  case ISD::USUBO:
8065  return LowerUnsignedALUO(Op, DAG);
8066  case ISD::ATOMIC_LOAD:
8067  case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
8068  case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
8069  case ISD::SDIVREM:
8070  case ISD::UDIVREM: return LowerDivRem(Op, DAG);
8072  if (Subtarget->isTargetWindows())
8073  return LowerDYNAMIC_STACKALLOC(Op, DAG);
8074  llvm_unreachable("Don't know how to custom lower this!");
8075  case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
8076  case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
8077  case ISD::FPOWI: return LowerFPOWI(Op, *Subtarget, DAG);
8078  case ARMISD::WIN__DBZCHK: return SDValue();
8079  }
8080 }
8081 
8083  SelectionDAG &DAG) {
8084  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
8085  unsigned Opc = 0;
8086  if (IntNo == Intrinsic::arm_smlald)
8087  Opc = ARMISD::SMLALD;
8088  else if (IntNo == Intrinsic::arm_smlaldx)
8089  Opc = ARMISD::SMLALDX;
8090  else if (IntNo == Intrinsic::arm_smlsld)
8091  Opc = ARMISD::SMLSLD;
8092  else if (IntNo == Intrinsic::arm_smlsldx)
8093  Opc = ARMISD::SMLSLDX;
8094  else
8095  return;
8096 
8097  SDLoc dl(N);
8099  N->getOperand(3),
8100  DAG.getConstant(0, dl, MVT::i32));
8102  N->getOperand(3),
8103  DAG.getConstant(1, dl, MVT::i32));
8104 
8105  SDValue LongMul = DAG.getNode(Opc, dl,
8106  DAG.getVTList(MVT::i32, MVT::i32),
8107  N->getOperand(1), N->getOperand(2),
8108  Lo, Hi);
8109  Results.push_back(LongMul.getValue(0));
8110  Results.push_back(LongMul.getValue(1));
8111 }
8112 
8113 /// ReplaceNodeResults - Replace the results of node with an illegal result
8114 /// type with new values built out of custom code.
8116  SmallVectorImpl<SDValue> &Results,
8117  SelectionDAG &DAG) const {
8118  SDValue Res;
8119  switch (N->getOpcode()) {
8120  default:
8121  llvm_unreachable("Don't know how to custom expand this!");
8122  case ISD::READ_REGISTER:
8123  ExpandREAD_REGISTER(N, Results, DAG);
8124  break;
8125  case ISD::BITCAST:
8126  Res = ExpandBITCAST(N, DAG, Subtarget);
8127  break;
8128  case ISD::SRL:
8129  case ISD::SRA:
8130  Res = Expand64BitShift(N, DAG, Subtarget);
8131  break;
8132  case ISD::SREM:
8133  case ISD::UREM:
8134  Res = LowerREM(N, DAG);
8135  break;
8136  case ISD::SDIVREM:
8137  case ISD::UDIVREM:
8138  Res = LowerDivRem(SDValue(N, 0), DAG);
8139  assert(Res.getNumOperands() == 2 && "DivRem needs two values");
8140  Results.push_back(Res.getValue(0));
8141  Results.push_back(Res.getValue(1));
8142  return;
8143  case ISD::READCYCLECOUNTER:
8144  ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
8145  return;
8146  case ISD::UDIV:
8147  case ISD::SDIV:
8148  assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
8149  return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
8150  Results);
8151  case ISD::ATOMIC_CMP_SWAP:
8152  ReplaceCMP_SWAP_64Results(N, Results, DAG);
8153  return;
8155  return ReplaceLongIntrinsic(N, Results, DAG);
8156  }
8157  if (Res.getNode())
8158  Results.push_back(Res);
8159 }
8160 
8161 //===----------------------------------------------------------------------===//
8162 // ARM Scheduler Hooks
8163 //===----------------------------------------------------------------------===//
8164 
8165 /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
8166 /// registers the function context.
8167 void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
8168  MachineBasicBlock *MBB,
8169  MachineBasicBlock *DispatchBB,
8170  int FI) const {
8171  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
8172  "ROPI/RWPI not currently supported with SjLj");
8173  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
8174  DebugLoc dl = MI.getDebugLoc();
8175  MachineFunction *MF = MBB->getParent();
8177  MachineConstantPool *MCP = MF->getConstantPool();
8178  ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
8179  const Function &F = MF->getFunction();
8180 
8181  bool isThumb = Subtarget->isThumb();
8182  bool isThumb2 = Subtarget->isThumb2();
8183 
8184  unsigned PCLabelId = AFI->createPICLabelUId();
8185  unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
8186  ARMConstantPoolValue *CPV =
8187  ARMConstantPoolMBB::Create(F.getContext(), DispatchBB, PCLabelId, PCAdj);
8188  unsigned CPI = MCP->getConstantPoolIndex(CPV, 4);
8189 
8190  const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
8191  : &ARM::GPRRegClass;
8192 
8193  // Grab constant pool and fixed stack memory operands.
8194  MachineMemOperand *CPMMO =
8197 
8198  MachineMemOperand *FIMMOSt =
8201 
8202  // Load the address of the dispatch MBB into the jump buffer.
8203  if (isThumb2) {
8204  // Incoming value: jbuf
8205  // ldr.n r5, LCPI1_1
8206  // orr r5, r5, #1
8207  // add r5, pc
8208  // str r5, [$jbuf, #+4] ; &jbuf[1]
8209  unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
8210  BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
8211  .addConstantPoolIndex(CPI)
8212  .addMemOperand(CPMMO)
8213  .add(predOps(ARMCC::AL));
8214  // Set the low bit because of thumb mode.
8215  unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
8216  BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
8217  .addReg(NewVReg1, RegState::Kill)
8218  .addImm(0x01)
8219  .add(predOps(ARMCC::AL))
8220  .add(condCodeOp());
8221  unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
8222  BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
8223  .addReg(NewVReg2, RegState::Kill)
8224  .addImm(PCLabelId);
8225  BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
8226  .addReg(NewVReg3, RegState::Kill)
8227  .addFrameIndex(FI)
8228  .addImm(36) // &jbuf[1] :: pc
8229  .addMemOperand(FIMMOSt)
8230  .add(predOps(ARMCC::AL));
8231  } else if (isThumb) {
8232  // Incoming value: jbuf
8233  // ldr.n r1, LCPI1_4
8234  // add r1, pc
8235  // mov r2, #1
8236  // orrs r1, r2
8237  // add r2, $jbuf, #+4 ; &jbuf[1]
8238  // str r1, [r2]
8239  unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
8240  BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
8241  .addConstantPoolIndex(CPI)
8242  .addMemOperand(CPMMO)
8243  .add(predOps(ARMCC::AL));
8244  unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
8245  BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
8246  .addReg(NewVReg1, RegState::Kill)
8247  .addImm(PCLabelId);
8248  // Set the low bit because of thumb mode.
8249  unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
8250  BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
8251  .addReg(ARM::CPSR, RegState::Define)
8252  .addImm(1)
8253  .add(predOps(ARMCC::AL));
8254  unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
8255  BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
8256  .addReg(ARM::CPSR, RegState::Define)
8257  .addReg(NewVReg2, RegState::Kill)
8258  .addReg(NewVReg3, RegState::Kill)
8259  .add(predOps(ARMCC::AL));
8260  unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
8261  BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
8262  .addFrameIndex(FI)
8263  .addImm(36); // &jbuf[1] :: pc
8264  BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
8265  .addReg(NewVReg4, RegState::Kill)
8266  .addReg(NewVReg5, RegState::Kill)
8267  .addImm(0)
8268  .addMemOperand(FIMMOSt)
8269  .add(predOps(ARMCC::AL));
8270  } else {
8271  // Incoming value: jbuf
8272  // ldr r1, LCPI1_1
8273  // add r1, pc, r1
8274  // str r1, [$jbuf, #+4] ; &jbuf[1]
8275  unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
8276  BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
8277  .addConstantPoolIndex(CPI)
8278  .addImm(0)
8279  .addMemOperand(CPMMO)
8280  .add(predOps(ARMCC::AL));
8281  unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
8282  BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
8283  .addReg(NewVReg1, RegState::Kill)
8284  .addImm(PCLabelId)
8285  .add(predOps(ARMCC::AL));
8286  BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
8287  .addReg(NewVReg2, RegState::Kill)
8288  .addFrameIndex(FI)
8289  .addImm(36) // &jbuf[1] :: pc
8290  .addMemOperand(FIMMOSt)
8291  .add(predOps(ARMCC::AL));
8292  }
8293 }
8294 
8295 void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
8296  MachineBasicBlock *MBB) const {
8297  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
8298  DebugLoc dl = MI.getDebugLoc();
8299  MachineFunction *MF = MBB->getParent();
8301  MachineFrameInfo &MFI = MF->getFrameInfo();
8302  int FI = MFI.getFunctionContextIndex();
8303 
8304  const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
8305  : &ARM::GPRnopcRegClass;
8306 
8307  // Get a mapping of the call site numbers to all of the landing pads they're
8308  // associated with.
8310  unsigned MaxCSNum = 0;
8311  for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
8312  ++BB) {
8313  if (!BB->isEHPad()) continue;
8314 
8315  // FIXME: We should assert that the EH_LABEL is the first MI in the landing
8316  // pad.
8318  II = BB->begin(), IE = BB->end(); II != IE; ++II) {
8319  if (!II->isEHLabel()) continue;
8320 
8321  MCSymbol *Sym = II->getOperand(0).getMCSymbol();
8322  if (!MF->hasCallSiteLandingPad(Sym)) continue;
8323 
8324  SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);
8326  CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end();
8327  CSI != CSE; ++CSI) {
8328  CallSiteNumToLPad[*CSI].push_back(&*BB);
8329  MaxCSNum = std::max(MaxCSNum, *CSI);
8330  }
8331  break;
8332  }
8333  }
8334 
8335  // Get an ordered list of the machine basic blocks for the jump table.
8336  std::vector<MachineBasicBlock*> LPadList;
8338  LPadList.reserve(CallSiteNumToLPad.size());
8339  for (unsigned I = 1; I <= MaxCSNum; ++I) {
8340  SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
8342  II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) {
8343  LPadList.push_back(*II);
8344  InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end());
8345  }
8346  }
8347 
8348  assert(!LPadList.empty() &&
8349  "No landing pad destinations for the dispatch jump table!");
8350 
8351  // Create the jump table and associated information.
8352  MachineJumpTableInfo *JTI =
8354  unsigned MJTI = JTI->createJumpTableIndex(LPadList);
8355 
8356  // Create the MBBs for the dispatch code.
8357 
8358  // Shove the dispatch's address into the return slot in the function context.
8359  MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
8360  DispatchBB->setIsEHPad();
8361 
8363  unsigned trap_opcode;
8364  if (Subtarget->isThumb())
8365  trap_opcode = ARM::tTRAP;
8366  else
8367  trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
8368 
8369  BuildMI(TrapBB, dl, TII->get(trap_opcode));
8370  DispatchBB->addSuccessor(TrapBB);
8371 
8372  MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
8373  DispatchBB->addSuccessor(DispContBB);
8374 
8375  // Insert and MBBs.
8376  MF->insert(MF->end(), DispatchBB);
8377  MF->insert(MF->end(), DispContBB);
8378  MF->insert(MF->end(), TrapBB);
8379 
8380  // Insert code into the entry block that creates and registers the function
8381  // context.
8382  SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
8383 
8384  MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
8387 
8388  MachineInstrBuilder MIB;
8389  MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
8390 
8391  const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
8392  const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
8393 
8394  // Add a register mask with no preserved registers. This results in all
8395  // registers being marked as clobbered. This can't work if the dispatch block
8396  // is in a Thumb1 function and is linked with ARM code which uses the FP
8397  // registers, as there is no way to preserve the FP registers in Thumb1 mode.
8399 
8400  bool IsPositionIndependent = isPositionIndependent();
8401  unsigned NumLPads = LPadList.size();
8402  if (Subtarget->isThumb2()) {
8403  unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
8404  BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
8405  .addFrameIndex(FI)
8406  .addImm(4)
8407  .addMemOperand(FIMMOLd)
8408  .add(predOps(ARMCC::AL));
8409 
8410  if (NumLPads < 256) {
8411  BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
8412  .addReg(NewVReg1)
8413  .addImm(LPadList.size())
8414  .add(predOps(ARMCC::AL));
8415  } else {
8416  unsigned VReg1 = MRI->createVirtualRegister(TRC);
8417  BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
8418  .addImm(NumLPads & 0xFFFF)
8419  .add(predOps(ARMCC::AL));
8420 
8421  unsigned VReg2 = VReg1;
8422  if ((NumLPads & 0xFFFF0000) != 0) {
8423  VReg2 = MRI->createVirtualRegister(TRC);
8424  BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
8425  .addReg(VReg1)
8426  .addImm(NumLPads >> 16)
8427  .add(predOps(ARMCC::AL));
8428  }
8429 
8430  BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
8431  .addReg(NewVReg1)
8432  .addReg(VReg2)
8433  .add(predOps(ARMCC::AL));
8434  }
8435 
8436  BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
8437  .addMBB(TrapBB)
8438  .addImm(ARMCC::HI)
8439  .addReg(ARM::CPSR);
8440 
8441  unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
8442  BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3)
8443  .addJumpTableIndex(MJTI)
8444  .add(predOps(ARMCC::AL));
8445 
8446  unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
8447  BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
8448  .addReg(NewVReg3, RegState::Kill)
8449  .addReg(NewVReg1)
8451  .add(predOps(ARMCC::AL))
8452  .add(condCodeOp());
8453 
8454  BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
8455  .addReg(NewVReg4, RegState::Kill)
8456  .addReg(NewVReg1)
8457  .addJumpTableIndex(MJTI);
8458  } else if (Subtarget->isThumb()) {
8459  unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
8460  BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
8461  .addFrameIndex(FI)
8462  .addImm(1)
8463  .addMemOperand(FIMMOLd)
8464  .add(predOps(ARMCC::AL));
8465 
8466  if (NumLPads < 256) {
8467  BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
8468  .addReg(NewVReg1)
8469  .addImm(NumLPads)
8470  .add(predOps(ARMCC::AL));
8471  } else {
8474  const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
8475 
8476  // MachineConstantPool wants an explicit alignment.
8477  unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
8478  if (Align == 0)
8479  Align = MF->getDataLayout().getTypeAllocSize(C->getType());
8480  unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
8481 
8482  unsigned VReg1 = MRI->createVirtualRegister(TRC);
8483  BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
8484  .addReg(VReg1, RegState::Define)
8485  .addConstantPoolIndex(Idx)
8486  .add(predOps(ARMCC::AL));
8487  BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
8488  .addReg(NewVReg1)
8489  .addReg(VReg1)
8490  .add(predOps(ARMCC::AL));
8491  }
8492 
8493  BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
8494  .addMBB(TrapBB)
8495  .addImm(ARMCC::HI)
8496  .addReg(ARM::CPSR);
8497 
8498  unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
8499  BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
8500  .addReg(ARM::CPSR, RegState::Define)
8501  .addReg(NewVReg1)
8502  .addImm(2)
8503  .add(predOps(ARMCC::AL));
8504 
8505  unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
8506  BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
8507  .addJumpTableIndex(MJTI)
8508  .add(predOps(ARMCC::AL));
8509 
8510  unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
8511  BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
8512  .addReg(ARM::CPSR, RegState::Define)
8513  .addReg(NewVReg2, RegState::Kill)
8514  .addReg(NewVReg3)
8515  .add(predOps(ARMCC::AL));
8516 
8517  MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
8519 
8520  unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
8521  BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
8522  .addReg(NewVReg4, RegState::Kill)
8523  .addImm(0)
8524  .addMemOperand(JTMMOLd)
8525  .add(predOps(ARMCC::AL));
8526 
8527  unsigned NewVReg6 = NewVReg5;
8528  if (IsPositionIndependent) {
8529  NewVReg6 = MRI->createVirtualRegister(TRC);
8530  BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
8531  .addReg(ARM::CPSR, RegState::Define)
8532  .addReg(NewVReg5, RegState::Kill)
8533  .addReg(NewVReg3)
8534  .add(predOps(ARMCC::AL));
8535  }
8536 
8537  BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
8538  .addReg(NewVReg6, RegState::Kill)
8539  .addJumpTableIndex(MJTI);
8540  } else {
8541  unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
8542  BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
8543  .addFrameIndex(FI)
8544  .addImm(4)
8545  .addMemOperand(FIMMOLd)
8546  .add(predOps(ARMCC::AL));
8547 
8548  if (NumLPads < 256) {
8549  BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
8550  .addReg(NewVReg1)
8551  .addImm(NumLPads)
8552  .add(predOps(ARMCC::AL));
8553  } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
8554  unsigned VReg1 = MRI->createVirtualRegister(TRC);
8555  BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
8556  .addImm(NumLPads & 0xFFFF)
8557  .add(predOps(ARMCC::AL));
8558 
8559  unsigned VReg2 = VReg1;
8560  if ((NumLPads & 0xFFFF0000) != 0) {
8561  VReg2 = MRI->createVirtualRegister(TRC);
8562  BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
8563  .addReg(VReg1)
8564  .addImm(NumLPads >> 16)
8565  .add(predOps(ARMCC::AL));
8566  }
8567 
8568  BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
8569  .addReg(NewVReg1)
8570  .addReg(VReg2)
8571  .add(predOps(ARMCC::AL));
8572  } else {
8575  const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
8576 
8577  // MachineConstantPool wants an explicit alignment.
8578  unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
8579  if (Align == 0)
8580  Align = MF->getDataLayout().getTypeAllocSize(C->getType());
8581  unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
8582 
8583  unsigned VReg1 = MRI->createVirtualRegister(TRC);
8584  BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
8585  .addReg(VReg1, RegState::Define)
8586  .addConstantPoolIndex(Idx)
8587  .addImm(0)
8588  .add(predOps(ARMCC::AL));
8589  BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
8590  .addReg(NewVReg1)
8591  .addReg(VReg1, RegState::Kill)
8592  .add(predOps(ARMCC::AL));
8593  }
8594 
8595  BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
8596  .addMBB(TrapBB)
8597  .addImm(ARMCC::HI)
8598  .addReg(ARM::CPSR);
8599 
8600  unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
8601  BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
8602  .addReg(NewVReg1)
8604  .add(predOps(ARMCC::AL))
8605  .add(condCodeOp());
8606  unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
8607  BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
8608  .addJumpTableIndex(MJTI)
8609  .add(predOps(ARMCC::AL));
8610 
8611  MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
8613  unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
8614  BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
8615  .addReg(NewVReg3, RegState::Kill)
8616  .addReg(NewVReg4)
8617  .addImm(0)
8618  .addMemOperand(JTMMOLd)
8619  .add(predOps(ARMCC::AL));
8620 
8621  if (IsPositionIndependent) {
8622  BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
8623  .addReg(NewVReg5, RegState::Kill)
8624  .addReg(NewVReg4)
8625  .addJumpTableIndex(MJTI);
8626  } else {
8627  BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
8628  .addReg(NewVReg5, RegState::Kill)
8629  .addJumpTableIndex(MJTI);
8630  }
8631  }
8632 
8633  // Add the jump table entries as successors to the MBB.
8635  for (std::vector<MachineBasicBlock*>::iterator
8636  I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
8637  MachineBasicBlock *CurMBB = *I;
8638  if (SeenMBBs.insert(CurMBB).second)
8639  DispContBB->addSuccessor(CurMBB);
8640  }
8641 
8642  // N.B. the order the invoke BBs are processed in doesn't matter here.
8643  const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
8645  for (MachineBasicBlock *BB : InvokeBBs) {
8646 
8647  // Remove the landing pad successor from the invoke block and replace it
8648  // with the new dispatch block.
8649  SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(),
8650  BB->succ_end());
8651  while (!Successors.empty()) {
8652  MachineBasicBlock *SMBB = Successors.pop_back_val();
8653  if (SMBB->isEHPad()) {
8654  BB->removeSuccessor(SMBB);
8655  MBBLPads.push_back(SMBB);
8656  }
8657  }
8658 
8659  BB->addSuccessor(DispatchBB, BranchProbability::getZero());
8660  BB->normalizeSuccProbs();
8661 
8662  // Find the invoke call and mark all of the callee-saved registers as
8663  // 'implicit defined' so that they're spilled. This prevents code from
8664  // moving instructions to before the EH block, where they will never be
8665  // executed.
8667  II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
8668  if (!II->isCall()) continue;
8669 
8670  DenseMap<unsigned, bool> DefRegs;
8672  OI = II->operands_begin(), OE = II->operands_end();
8673  OI != OE; ++OI) {
8674  if (!OI->isReg()) continue;
8675  DefRegs[OI->getReg()] = true;
8676  }
8677 
8678  MachineInstrBuilder MIB(*MF, &*II);
8679 
8680  for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
8681  unsigned Reg = SavedRegs[i];
8682  if (Subtarget->isThumb2() &&
8683  !ARM::tGPRRegClass.contains(Reg) &&
8684  !ARM::hGPRRegClass.contains(Reg))
8685  continue;
8686  if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
8687  continue;
8688  if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
8689  continue;
8690  if (!DefRegs[Reg])
8692  }
8693 
8694  break;
8695  }
8696  }
8697 
8698  // Mark all former landing pads as non-landing pads. The dispatch is the only
8699  // landing pad now.
8701  I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I)
8702  (*I)->setIsEHPad(false);
8703 
8704  // The instruction is gone now.
8705  MI.eraseFromParent();
8706 }
8707 
8708 static
8711  E = MBB->succ_end(); I != E; ++I)
8712  if (*I != Succ)
8713  return *I;
8714  llvm_unreachable("Expecting a BB with two successors!");
8715 }
8716 
8717 /// Return the load opcode for a given load size. If load size >= 8,
8718 /// neon opcode will be returned.
8719 static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
8720  if (LdSize >= 8)
8721  return LdSize == 16 ? ARM::VLD1q32wb_fixed
8722  : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
8723  if (IsThumb1)
8724  return LdSize == 4 ? ARM::tLDRi
8725  : LdSize == 2 ? ARM::tLDRHi
8726  : LdSize == 1 ? ARM::tLDRBi : 0;
8727  if (IsThumb2)
8728  return LdSize == 4 ? ARM::t2LDR_POST
8729  : LdSize == 2 ? ARM::t2LDRH_POST
8730  : LdSize == 1 ? ARM::t2LDRB_POST : 0;
8731  return LdSize == 4 ? ARM::LDR_POST_IMM
8732  : LdSize == 2 ? ARM::LDRH_POST
8733  : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
8734 }
8735 
8736 /// Return the store opcode for a given store size. If store size >= 8,
8737 /// neon opcode will be returned.
8738 static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
8739  if (StSize >= 8)
8740  return StSize == 16 ? ARM::VST1q32wb_fixed
8741  : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
8742  if (IsThumb1)
8743  return StSize == 4 ? ARM::tSTRi
8744  : StSize == 2 ? ARM::tSTRHi
8745  : StSize == 1 ? ARM::tSTRBi : 0;
8746  if (IsThumb2)
8747  return StSize == 4 ? ARM::t2STR_POST
8748  : StSize == 2 ? ARM::t2STRH_POST
8749  : StSize == 1 ? ARM::t2STRB_POST : 0;
8750  return StSize == 4 ? ARM::STR_POST_IMM
8751  : StSize == 2 ? ARM::STRH_POST
8752  : StSize == 1 ? ARM::STRB_POST_IMM : 0;
8753 }
8754 
8755 /// Emit a post-increment load operation with given size. The instructions
8756 /// will be added to BB at Pos.
8758  const TargetInstrInfo *TII, const DebugLoc &dl,
8759  unsigned LdSize, unsigned Data, unsigned AddrIn,
8760  unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
8761  unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
8762  assert(LdOpc != 0 && "Should have a load opcode");
8763  if (LdSize >= 8) {
8764  BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
8765  .addReg(AddrOut, RegState::Define)
8766  .addReg(AddrIn)
8767  .addImm(0)
8768  .add(predOps(ARMCC::AL));
8769  } else if (IsThumb1) {
8770  // load + update AddrIn
8771  BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
8772  .addReg(AddrIn)
8773  .addImm(0)
8774  .add(predOps(ARMCC::AL));
8775  BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
8776  .add(t1CondCodeOp())
8777  .addReg(AddrIn)
8778  .addImm(LdSize)
8779  .add(predOps(ARMCC::AL));
8780  } else if (IsThumb2) {
8781  BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
8782  .addReg(AddrOut, RegState::Define)
8783  .addReg(AddrIn)
8784  .addImm(LdSize)
8785  .add(predOps(ARMCC::AL));
8786  } else { // arm
8787  BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
8788  .addReg(AddrOut, RegState::Define)
8789  .addReg(AddrIn)
8790  .addReg(0)
8791  .addImm(LdSize)
8792  .add(predOps(ARMCC::AL));
8793  }
8794 }
8795 
8796 /// Emit a post-increment store operation with given size. The instructions
8797 /// will be added to BB at Pos.
8799  const TargetInstrInfo *TII, const DebugLoc &dl,
8800  unsigned StSize, unsigned Data, unsigned AddrIn,
8801  unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
8802  unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
8803  assert(StOpc != 0 && "Should have a store opcode");
8804  if (StSize >= 8) {
8805  BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
8806  .addReg(AddrIn)
8807  .addImm(0)
8808  .addReg(Data)
8809  .add(predOps(ARMCC::AL));
8810  } else if (IsThumb1) {
8811  // store + update AddrIn
8812  BuildMI(*BB, Pos, dl, TII->get(StOpc))
8813  .addReg(Data)
8814  .addReg(AddrIn)
8815  .addImm(0)
8816  .add(predOps(ARMCC::AL));
8817  BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
8818  .add(t1CondCodeOp())
8819  .addReg(AddrIn)
8820  .addImm(StSize)
8821  .add(predOps(ARMCC::AL));
8822  } else if (IsThumb2) {
8823  BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
8824  .addReg(Data)
8825  .addReg(AddrIn)
8826  .addImm(StSize)
8827  .add(predOps(ARMCC::AL));
8828  } else { // arm
8829  BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
8830  .addReg(Data)
8831  .addReg(AddrIn)
8832  .addReg(0)
8833  .addImm(StSize)
8834  .add(predOps(ARMCC::AL));
8835  }
8836 }
8837 
8839 ARMTargetLowering::EmitStructByval(MachineInstr &MI,
8840  MachineBasicBlock *BB) const {
8841  // This pseudo instruction has 3 operands: dst, src, size
8842  // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
8843  // Otherwise, we will generate unrolled scalar copies.
8844  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
8845  const BasicBlock *LLVM_BB = BB->getBasicBlock();
8847 
8848  unsigned dest = MI.getOperand(0).getReg();
8849  unsigned src = MI.getOperand(1).getReg();
8850  unsigned SizeVal = MI.getOperand(2).getImm();
8851  unsigned Align = MI.getOperand(3).getImm();
8852  DebugLoc dl = MI.getDebugLoc();
8853 
8854  MachineFunction *MF = BB->getParent();
8856  unsigned UnitSize = 0;
8857  const TargetRegisterClass *TRC = nullptr;
8858  const TargetRegisterClass *VecTRC = nullptr;
8859 
8860  bool IsThumb1 = Subtarget->isThumb1Only();
8861  bool IsThumb2 = Subtarget->isThumb2();
8862  bool IsThumb = Subtarget->isThumb();
8863 
8864  if (Align & 1) {
8865  UnitSize = 1;
8866  } else if (Align & 2) {
8867  UnitSize = 2;
8868  } else {
8869  // Check whether we can use NEON instructions.
8871  Subtarget->hasNEON()) {
8872  if ((Align % 16 == 0) && SizeVal >= 16)
8873  UnitSize = 16;
8874  else if ((Align % 8 == 0) && SizeVal >= 8)
8875  UnitSize = 8;
8876  }
8877  // Can't use NEON instructions.
8878  if (UnitSize == 0)
8879  UnitSize = 4;
8880  }
8881 
8882  // Select the correct opcode and register class for unit size load/store
8883  bool IsNeon = UnitSize >= 8;
8884  TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
8885  if (IsNeon)
8886  VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
8887  : UnitSize == 8 ? &ARM::DPRRegClass
8888  : nullptr;
8889 
8890  unsigned BytesLeft = SizeVal % UnitSize;
8891  unsigned LoopSize = SizeVal - BytesLeft;
8892 
8893  if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
8894  // Use LDR and STR to copy.
8895  // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
8896  // [destOut] = STR_POST(scratch, destIn, UnitSize)
8897  unsigned srcIn = src;
8898  unsigned destIn = dest;
8899  for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
8900  unsigned srcOut = MRI.createVirtualRegister(TRC);
8901  unsigned destOut = MRI.createVirtualRegister(TRC);
8902  unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
8903  emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
8904  IsThumb1, IsThumb2);
8905  emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
8906  IsThumb1, IsThumb2);
8907  srcIn = srcOut;
8908  destIn = destOut;
8909  }
8910 
8911  // Handle the leftover bytes with LDRB and STRB.
8912  // [scratch, srcOut] = LDRB_POST(srcIn, 1)
8913  // [destOut] = STRB_POST(scratch, destIn, 1)
8914  for (unsigned i = 0; i < BytesLeft; i++) {
8915  unsigned srcOut = MRI.createVirtualRegister(TRC);
8916  unsigned destOut = MRI.createVirtualRegister(TRC);
8917  unsigned scratch = MRI.createVirtualRegister(TRC);
8918  emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
8919  IsThumb1, IsThumb2);
8920  emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
8921  IsThumb1, IsThumb2);
8922  srcIn = srcOut;
8923  destIn = destOut;
8924  }
8925  MI.eraseFromParent(); // The instruction is gone now.
8926  return BB;
8927  }
8928 
8929  // Expand the pseudo op to a loop.
8930  // thisMBB:
8931  // ...
8932  // movw varEnd, # --> with thumb2
8933  // movt varEnd, #
8934  // ldrcp varEnd, idx --> without thumb2
8935  // fallthrough --> loopMBB
8936  // loopMBB:
8937  // PHI varPhi, varEnd, varLoop
8938  // PHI srcPhi, src, srcLoop
8939  // PHI destPhi, dst, destLoop
8940  // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
8941  // [destLoop] = STR_POST(scratch, destPhi, UnitSize)
8942  // subs varLoop, varPhi, #UnitSize
8943  // bne loopMBB
8944  // fallthrough --> exitMBB
8945  // exitMBB:
8946  // epilogue to handle left-over bytes
8947  // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
8948  // [destOut] = STRB_POST(scratch, destLoop, 1)
8949  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
8950  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
8951  MF->insert(It, loopMBB);
8952  MF->insert(It, exitMBB);
8953 
8954  // Transfer the remainder of BB and its successor edges to exitMBB.
8955  exitMBB->splice(exitMBB->begin(), BB,
8956  std::next(MachineBasicBlock::iterator(MI)), BB->end());
8957  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
8958 
8959  // Load an immediate to varEnd.
8960  unsigned varEnd = MRI.createVirtualRegister(TRC);
8961  if (Subtarget->useMovt(*MF)) {
8962  unsigned Vtmp = varEnd;
8963  if ((LoopSize & 0xFFFF0000) != 0)
8964  Vtmp = MRI.createVirtualRegister(TRC);
8965  BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi16 : ARM::MOVi16), Vtmp)
8966  .addImm(LoopSize & 0xFFFF)
8967  .add(predOps(ARMCC::AL));
8968 
8969  if ((LoopSize & 0xFFFF0000) != 0)
8970  BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVTi16 : ARM::MOVTi16), varEnd)
8971  .addReg(Vtmp)
8972  .addImm(LoopSize >> 16)
8973  .add(predOps(ARMCC::AL));
8974  } else {
8977  const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
8978 
8979  // MachineConstantPool wants an explicit alignment.
8980  unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
8981  if (Align == 0)
8982  Align = MF->getDataLayout().getTypeAllocSize(C->getType());
8983  unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
8984 
8985  if (IsThumb)
8986  BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci))
8987  .addReg(varEnd, RegState::Define)
8988  .addConstantPoolIndex(Idx)
8989  .add(predOps(ARMCC::AL));
8990  else
8991  BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp))
8992  .addReg(varEnd, RegState::Define)
8993  .addConstantPoolIndex(Idx)
8994  .addImm(0)
8995  .add(predOps(ARMCC::AL));
8996  }
8997  BB->addSuccessor(loopMBB);
8998 
8999  // Generate the loop body:
9000  // varPhi = PHI(varLoop, varEnd)
9001  // srcPhi = PHI(srcLoop, src)
9002  // destPhi = PHI(destLoop, dst)
9003  MachineBasicBlock *entryBB = BB;
9004  BB = loopMBB;
9005  unsigned varLoop = MRI.createVirtualRegister(TRC);
9006  unsigned varPhi = MRI.createVirtualRegister(TRC);
9007  unsigned srcLoop = MRI.createVirtualRegister(TRC);
9008  unsigned srcPhi = MRI.createVirtualRegister(TRC);
9009  unsigned destLoop = MRI.createVirtualRegister(TRC);
9010  unsigned destPhi = MRI.createVirtualRegister(TRC);
9011 
9012  BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
9013  .addReg(varLoop).addMBB(loopMBB)
9014  .addReg(varEnd).addMBB(entryBB);
9015  BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
9016  .addReg(srcLoop).addMBB(loopMBB)
9017  .addReg(src).addMBB(entryBB);
9018  BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
9019  .addReg(destLoop).addMBB(loopMBB)
9020  .addReg(dest).addMBB(entryBB);
9021 
9022  // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
9023  // [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
9024  unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
9025  emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
9026  IsThumb1, IsThumb2);
9027  emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
9028  IsThumb1, IsThumb2);
9029 
9030  // Decrement loop variable by UnitSize.
9031  if (IsThumb1) {
9032  BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop)
9033  .add(t1CondCodeOp())
9034  .addReg(varPhi)
9035  .addImm(UnitSize)
9036  .add(predOps(ARMCC::AL));
9037  } else {
9038  MachineInstrBuilder MIB =
9039  BuildMI(*BB, BB->end(), dl,
9040  TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
9041  MIB.addReg(varPhi)
9042  .addImm(UnitSize)
9043  .add(predOps(ARMCC::AL))
9044  .add(condCodeOp());
9045  MIB->getOperand(5).setReg(ARM::CPSR);
9046  MIB->getOperand(5).setIsDef(true);
9047  }
9048  BuildMI(*BB, BB->end(), dl,
9049  TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
9050  .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
9051 
9052  // loopMBB can loop back to loopMBB or fall through to exitMBB.
9053  BB->addSuccessor(loopMBB);
9054  BB->addSuccessor(exitMBB);
9055 
9056  // Add epilogue to handle BytesLeft.
9057  BB = exitMBB;
9058  auto StartOfExit = exitMBB->begin();
9059 
9060  // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
9061  // [destOut] = STRB_POST(scratch, destLoop, 1)
9062  unsigned srcIn = srcLoop;
9063  unsigned destIn = destLoop;
9064  for (unsigned i = 0; i < BytesLeft; i++) {
9065  unsigned srcOut = MRI.createVirtualRegister(TRC);
9066  unsigned destOut = MRI.createVirtualRegister(TRC);
9067  unsigned scratch = MRI.createVirtualRegister(TRC);
9068  emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
9069  IsThumb1, IsThumb2);
9070  emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
9071  IsThumb1, IsThumb2);
9072  srcIn = srcOut;
9073  destIn = destOut;
9074  }
9075 
9076  MI.eraseFromParent(); // The instruction is gone now.
9077  return BB;
9078 }
9079 
9081 ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
9082  MachineBasicBlock *MBB) const {
9083  const TargetMachine &TM = getTargetMachine();
9084  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
9085  DebugLoc DL = MI.getDebugLoc();
9086 
9087  assert(Subtarget->isTargetWindows() &&
9088  "__chkstk is only supported on Windows");
9089  assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");
9090 
9091  // __chkstk takes the number of words to allocate on the stack in R4, and
9092  // returns the stack adjustment in number of bytes in R4. This will not
9093  // clober any other registers (other than the obvious lr).
9094  //
9095  // Although, technically, IP should be considered a register which may be
9096  // clobbered, the call itself will not touch it. Windows on ARM is a pure
9097  // thumb-2 environment, so there is no interworking required. As a result, we
9098  // do not expect a veneer to be emitted by the linker, clobbering IP.
9099  //
9100  // Each module receives its own copy of __chkstk, so no import thunk is
9101  // required, again, ensuring that IP is not clobbered.
9102  //
9103  // Finally, although some linkers may theoretically provide a trampoline for
9104  // out of range calls (which is quite common due to a 32M range limitation of
9105  // branches for Thumb), we can generate the long-call version via
9106  // -mcmodel=large, alleviating the need for the trampoline which may clobber
9107  // IP.
9108 
9109  switch (TM.getCodeModel()) {
9110  case CodeModel::Tiny:
9111  llvm_unreachable("Tiny code model not available on ARM.");
9112  case CodeModel::Small:
9113  case CodeModel::Medium:
9114  case CodeModel::Kernel:
9115  BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
9116  .add(predOps(ARMCC::AL))
9117  .addExternalSymbol("__chkstk")
9120  .addReg(ARM::R12,
9122  .addReg(ARM::CPSR,
9124  break;
9125  case CodeModel::Large: {
9127  unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
9128 
9129  BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
9130  .addExternalSymbol("__chkstk");
9131  BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr))
9132  .add(predOps(ARMCC::AL))
9133  .addReg(Reg, RegState::Kill)
9136  .addReg(ARM::R12,
9138  .addReg(ARM::CPSR,
9140  break;
9141  }
9142  }
9143 
9144  BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP)
9145  .addReg(ARM::SP, RegState::Kill)
9148  .add(predOps(ARMCC::AL))
9149  .add(condCodeOp());
9150 
9151  MI.eraseFromParent();
9152  return MBB;
9153 }
9154 
9156 ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
9157  MachineBasicBlock *MBB) const {
9158  DebugLoc DL = MI.getDebugLoc();
9159  MachineFunction *MF = MBB->getParent();
9160  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
9161 
9163  MF->insert(++MBB->getIterator(), ContBB);
9164  ContBB->splice(ContBB->begin(), MBB,
9165  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
9166  ContBB->transferSuccessorsAndUpdatePHIs(MBB);
9167  MBB->addSuccessor(ContBB);
9168 
9170  BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0));
9171  MF->push_back(TrapBB);
9172  MBB->addSuccessor(TrapBB);
9173 
9174  BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
9175  .addReg(MI.getOperand(0).getReg())
9176  .addImm(0)
9177  .add(predOps(ARMCC::AL));
9178  BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))
9179  .addMBB(TrapBB)
9180  .addImm(ARMCC::EQ)
9181  .addReg(ARM::CPSR);
9182 
9183  MI.eraseFromParent();
9184  return ContBB;
9185 }
9186 
9187 // The CPSR operand of SelectItr might be missing a kill marker
9188 // because there were multiple uses of CPSR, and ISel didn't know
9189 // which to mark. Figure out whether SelectItr should have had a
9190 // kill marker, and set it if it should. Returns the correct kill
9191 // marker value.
9193  MachineBasicBlock* BB,
9194  const TargetRegisterInfo* TRI) {
9195  // Scan forward through BB for a use/def of CPSR.
9196  MachineBasicBlock::iterator miI(std::next(SelectItr));
9197  for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
9198  const MachineInstr& mi = *miI;
9199  if (mi.readsRegister(ARM::CPSR))
9200  return false;
9201  if (mi.definesRegister(ARM::CPSR))
9202  break; // Should have kill-flag - update below.
9203  }
9204 
9205  // If we hit the end of the block, check whether CPSR is live into a
9206  // successor.
9207  if (miI == BB->end()) {
9208  for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(),
9209  sEnd = BB->succ_end();
9210  sItr != sEnd; ++sItr) {
9211  MachineBasicBlock* succ = *sItr;
9212  if (succ->isLiveIn(ARM::CPSR))
9213  return false;
9214  }
9215  }
9216 
9217  // We found a def, or hit the end of the basic block and CPSR wasn't live
9218  // out. SelectMI should have a kill flag on CPSR.
9219  SelectItr->addRegisterKilled(ARM::CPSR, TRI);
9220  return true;
9221 }
9222 
9225  MachineBasicBlock *BB) const {
9226  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
9227  DebugLoc dl = MI.getDebugLoc();
9228  bool isThumb2 = Subtarget->isThumb2();
9229  switch (MI.getOpcode()) {
9230  default: {
9231  MI.print(errs());
9232  llvm_unreachable("Unexpected instr type to insert");
9233  }
9234 
9235  // Thumb1 post-indexed loads are really just single-register LDMs.
9236  case ARM::tLDR_postidx: {
9237  MachineOperand Def(MI.getOperand(1));
9238  BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
9239  .add(Def) // Rn_wb
9240  .add(MI.getOperand(2)) // Rn
9241  .add(MI.getOperand(3)) // PredImm
9242  .add(MI.getOperand(4)) // PredReg
9243  .add(MI.getOperand(0)); // Rt
9244  MI.eraseFromParent();
9245  return BB;
9246  }
9247 
9248  // The Thumb2 pre-indexed stores have the same MI operands, they just
9249  // define them differently in the .td files from the isel patterns, so
9250  // they need pseudos.
9251  case ARM::t2STR_preidx:
9252  MI.setDesc(TII->get(ARM::t2STR_PRE));
9253  return BB;
9254  case ARM::t2STRB_preidx:
9255  MI.setDesc(TII->get(ARM::t2STRB_PRE));
9256  return BB;
9257  case ARM::t2STRH_preidx:
9258  MI.setDesc(TII->get(ARM::t2STRH_PRE));
9259  return BB;
9260 
9261  case ARM::STRi_preidx:
9262  case ARM::STRBi_preidx: {
9263  unsigned NewOpc = MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM
9264  : ARM::STRB_PRE_IMM;
9265  // Decode the offset.
9266  unsigned Offset = MI.getOperand(4).getImm();
9267  bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
9268  Offset = ARM_AM::getAM2Offset(Offset);
9269  if (isSub)
9270  Offset = -Offset;
9271 
9272  MachineMemOperand *MMO = *MI.memoperands_begin();
9273  BuildMI(*BB, MI, dl, TII->get(NewOpc))
9274  .add(MI.getOperand(0)) // Rn_wb
9275  .add(MI.getOperand(1)) // Rt
9276  .add(MI.getOperand(2)) // Rn
9277  .addImm(Offset) // offset (skip GPR==zero_reg)
9278  .add(MI.getOperand(5)) // pred
9279  .add(MI.getOperand(6))
9280  .addMemOperand(MMO);
9281  MI.eraseFromParent();
9282  return BB;
9283  }
9284  case ARM::STRr_preidx:
9285  case ARM::STRBr_preidx:
9286  case ARM::STRH_preidx: {
9287  unsigned NewOpc;
9288  switch (MI.getOpcode()) {
9289  default: llvm_unreachable("unexpected opcode!");
9290  case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
9291  case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
9292  case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
9293  }
9294  MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
9295  for (unsigned i = 0; i < MI.getNumOperands(); ++i)
9296  MIB.add(MI.getOperand(i));
9297  MI.eraseFromParent();
9298  return BB;
9299  }
9300 
9301  case ARM::tMOVCCr_pseudo: {
9302  // To "insert" a SELECT_CC instruction, we actually have to insert the
9303  // diamond control-flow pattern. The incoming instruction knows the
9304  // destination vreg to set, the condition code register to branch on, the
9305  // true/false values to select between, and a branch opcode to use.
9306  const BasicBlock *LLVM_BB = BB->getBasicBlock();
9308 
9309  // thisMBB:
9310  // ...
9311  // TrueVal = ...
9312  // cmpTY ccX, r1, r2
9313  // bCC copy1MBB
9314  // fallthrough --> copy0MBB
9315  MachineBasicBlock *thisMBB = BB;
9316  MachineFunction *F = BB->getParent();
9317  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
9318  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
9319  F->insert(It, copy0MBB);
9320  F->insert(It, sinkMBB);
9321 
9322  // Check whether CPSR is live past the tMOVCCr_pseudo.
9323  const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
9324  if (!MI.killsRegister(ARM::CPSR) &&
9325  !checkAndUpdateCPSRKill(MI, thisMBB, TRI)) {
9326  copy0MBB->addLiveIn(ARM::CPSR);
9327  sinkMBB->addLiveIn(ARM::CPSR);
9328  }
9329 
9330  // Transfer the remainder of BB and its successor edges to sinkMBB.
9331  sinkMBB->splice(sinkMBB->begin(), BB,
9332  std::next(MachineBasicBlock::iterator(MI)), BB->end());
9333  sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
9334 
9335  BB->addSuccessor(copy0MBB);
9336  BB->addSuccessor(sinkMBB);
9337 
9338  BuildMI(BB, dl, TII->get(ARM::tBcc))
9339  .addMBB(sinkMBB)
9340  .addImm(MI.getOperand(3).getImm())
9341  .addReg(MI.getOperand(4).getReg());
9342 
9343  // copy0MBB:
9344  // %FalseValue = ...
9345  // # fallthrough to sinkMBB
9346  BB = copy0MBB;
9347 
9348  // Update machine-CFG edges
9349  BB->addSuccessor(sinkMBB);
9350 
9351  // sinkMBB:
9352  // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
9353  // ...
9354  BB = sinkMBB;
9355  BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg())
9356  .addReg(MI.getOperand(1).getReg())
9357  .addMBB(copy0MBB)
9358  .addReg(MI.getOperand(2).getReg())
9359  .addMBB(thisMBB);
9360 
9361  MI.eraseFromParent(); // The pseudo instruction is gone now.
9362  return BB;
9363  }
9364 
9365  case ARM::BCCi64:
9366  case ARM::BCCZi64: {
9367  // If there is an unconditional branch to the other successor, remove it.
9368  BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());
9369 
9370  // Compare both parts that make up the double comparison separately for
9371  // equality.
9372  bool RHSisZero = MI.getOpcode() == ARM::BCCZi64;
9373 
9374  unsigned LHS1 = MI.getOperand(1).getReg();
9375  unsigned LHS2 = MI.getOperand(2).getReg();
9376  if (RHSisZero) {
9377  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
9378  .addReg(LHS1)
9379  .addImm(0)
9380  .add(predOps(ARMCC::AL));
9381  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
9382  .addReg(LHS2).addImm(0)
9383  .addImm(ARMCC::EQ).addReg(ARM::CPSR);
9384  } else {
9385  unsigned RHS1 = MI.getOperand(3).getReg();
9386  unsigned RHS2 = MI.getOperand(4).getReg();
9387  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
9388  .addReg(LHS1)
9389  .addReg(RHS1)
9390  .add(predOps(ARMCC::AL));
9391  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
9392  .addReg(LHS2).addReg(RHS2)
9393  .addImm(ARMCC::EQ).addReg(ARM::CPSR);
9394  }
9395 
9396  MachineBasicBlock *destMBB = MI.getOperand(RHSisZero ? 3 : 5).getMBB();
9397  MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
9398  if (MI.getOperand(0).getImm() == ARMCC::NE)
9399  std::swap(destMBB, exitMBB);
9400 
9401  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
9402  .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
9403  if (isThumb2)
9404  BuildMI(BB, dl, TII->get(ARM::t2B))
9405  .addMBB(exitMBB)
9406  .add(predOps(ARMCC::AL));
9407  else
9408  BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
9409 
9410  MI.eraseFromParent(); // The pseudo instruction is gone now.
9411  return BB;
9412  }
9413 
9414  case ARM::Int_eh_sjlj_setjmp:
9415  case ARM::Int_eh_sjlj_setjmp_nofp:
9416  case ARM::tInt_eh_sjlj_setjmp:
9417  case ARM::t2Int_eh_sjlj_setjmp:
9418  case ARM::t2Int_eh_sjlj_setjmp_nofp:
9419  return BB;
9420 
9421  case ARM::Int_eh_sjlj_setup_dispatch:
9422  EmitSjLjDispatchBlock(MI, BB);
9423  return BB;
9424 
9425  case ARM::ABS:
9426  case ARM::t2ABS: {
9427  // To insert an ABS instruction, we have to insert the
9428  // diamond control-flow pattern. The incoming instruction knows the
9429  // source vreg to test against 0, the destination vreg to set,
9430  // the condition code register to branch on, the
9431  // true/false values to select between, and a branch opcode to use.
9432  // It transforms
9433  // V1 = ABS V0
9434  // into
9435  // V2 = MOVS V0
9436  // BCC (branch to SinkBB if V0 >= 0)
9437  // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
9438  // SinkBB: V1 = PHI(V2, V3)
9439  const BasicBlock *LLVM_BB = BB->getBasicBlock();
9440  MachineFunction::iterator BBI = ++BB->getIterator();
9441  MachineFunction *Fn = BB->getParent();
9442  MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
9443  MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
9444  Fn->insert(BBI, RSBBB);
9445  Fn->insert(BBI, SinkBB);
9446 
9447  unsigned int ABSSrcReg = MI.getOperand(1).getReg();
9448  unsigned int ABSDstReg = MI.getOperand(0).getReg();
9449  bool ABSSrcKIll = MI.getOperand(1).isKill();
9450  bool isThumb2 = Subtarget->isThumb2();
9452  // In Thumb mode S must not be specified if source register is the SP or
9453  // PC and if destination register is the SP, so restrict register class
9454  unsigned NewRsbDstReg =
9455  MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
9456 
9457  // Transfer the remainder of BB and its successor edges to sinkMBB.
9458  SinkBB->splice(SinkBB->begin(), BB,
9459  std::next(MachineBasicBlock::iterator(MI)), BB->end());
9460  SinkBB->transferSuccessorsAndUpdatePHIs(BB);
9461 
9462  BB->addSuccessor(RSBBB);
9463  BB->addSuccessor(SinkBB);
9464 
9465  // fall through to SinkMBB
9466  RSBBB->addSuccessor(SinkBB);
9467 
9468  // insert a cmp at the end of BB
9469  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
9470  .addReg(ABSSrcReg)
9471  .addImm(0)
9472  .add(predOps(ARMCC::AL));
9473 
9474  // insert a bcc with opposite CC to ARMCC::MI at the end of BB
9475  BuildMI(BB, dl,
9476  TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
9478 
9479  // insert rsbri in RSBBB
9480  // Note: BCC and rsbri will be converted into predicated rsbmi
9481  // by if-conversion pass
9482  BuildMI(*RSBBB, RSBBB->begin(), dl,
9483  TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
9484  .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
9485  .addImm(0)
9486  .add(predOps(ARMCC::AL))
9487  .add(condCodeOp());
9488 
9489  // insert PHI in SinkBB,
9490  // reuse ABSDstReg to not change uses of ABS instruction
9491  BuildMI(*SinkBB, SinkBB->begin(), dl,
9492  TII->get(ARM::PHI), ABSDstReg)
9493  .addReg(NewRsbDstReg).addMBB(RSBBB)
9494  .addReg(ABSSrcReg).addMBB(BB);
9495 
9496  // remove ABS instruction
9497  MI.eraseFromParent();
9498 
9499  // return last added BB
9500  return SinkBB;
9501  }
9502  case ARM::COPY_STRUCT_BYVAL_I32:
9503  ++NumLoopByVals;
9504  return EmitStructByval(MI, BB);
9505  case ARM::WIN__CHKSTK:
9506  return EmitLowered__chkstk(MI, BB);
9507  case ARM::WIN__DBZCHK:
9508  return EmitLowered__dbzchk(MI, BB);
9509  }
9510 }
9511 
9512 /// Attaches vregs to MEMCPY that it will use as scratch registers
9513 /// when it is expanded into LDM/STM. This is done as a post-isel lowering
9514 /// instead of as a custom inserter because we need the use list from the SDNode.
9515 static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,
9516  MachineInstr &MI, const SDNode *Node) {
9517  bool isThumb1 = Subtarget->isThumb1Only();
9518 
9519  DebugLoc DL = MI.getDebugLoc();
9520  MachineFunction *MF = MI.getParent()->getParent();
9522  MachineInstrBuilder MIB(*MF, MI);
9523 
9524  // If the new dst/src is unused mark it as dead.
9525  if (!Node->hasAnyUseOfValue(0)) {
9526  MI.getOperand(0).setIsDead(true);
9527  }
9528  if (!Node->hasAnyUseOfValue(1)) {
9529  MI.getOperand(1).setIsDead(true);
9530  }
9531 
9532  // The MEMCPY both defines and kills the scratch registers.
9533  for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) {
9534  unsigned TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
9535  : &ARM::GPRRegClass);
9536  MIB.addReg(TmpReg, RegState::Define|RegState::Dead);
9537  }
9538 }
9539 
9541  SDNode *Node) const {
9542  if (MI.getOpcode() == ARM::MEMCPY) {
9543  attachMEMCPYScratchRegs(Subtarget, MI, Node);
9544  return;
9545  }
9546 
9547  const MCInstrDesc *MCID = &MI.getDesc();
9548  // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
9549  // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
9550  // operand is still set to noreg. If needed, set the optional operand's
9551  // register to CPSR, and remove the redundant implicit def.
9552  //
9553  // e.g. ADCS (..., implicit-def CPSR) -> ADC (... opt:def CPSR).
9554 
9555  // Rename pseudo opcodes.
9556  unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());
9557  unsigned ccOutIdx;
9558  if (NewOpc) {
9559  const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
9560  MCID = &TII->get(NewOpc);
9561 
9562  assert(MCID->getNumOperands() ==
9563  MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize()
9564  && "converted opcode should be the same except for cc_out"
9565  " (and, on Thumb1, pred)");
9566 
9567  MI.setDesc(*MCID);
9568 
9569  // Add the optional cc_out operand
9570  MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
9571 
9572  // On Thumb1, move all input operands to the end, then add the predicate
9573  if (Subtarget->isThumb1Only()) {
9574  for (unsigned c = MCID->getNumOperands() - 4; c--;) {
9575  MI.addOperand(MI.getOperand(1));
9576  MI.RemoveOperand(1);
9577  }
9578 
9579  // Restore the ties
9580  for (unsigned i = MI.getNumOperands(); i--;) {
9581  const MachineOperand& op = MI.getOperand(i);
9582  if (op.isReg() && op.isUse()) {
9583  int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO);
9584  if (DefIdx != -1)
9585  MI.tieOperands(DefIdx, i);
9586  }
9587  }
9588 
9590  MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/false));
9591  ccOutIdx = 1;
9592  } else
9593  ccOutIdx = MCID->getNumOperands() - 1;
9594  } else
9595  ccOutIdx = MCID->getNumOperands() - 1;
9596 
9597  // Any ARM instruction that sets the 's' bit should specify an optional
9598  // "cc_out" operand in the last operand position.
9599  if (!MI.hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
9600  assert(!NewOpc && "Optional cc_out operand required");
9601  return;
9602  }
9603  // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
9604  // since we already have an optional CPSR def.
9605  bool definesCPSR = false;
9606  bool deadCPSR = false;
9607  for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e;
9608  ++i) {
9609  const MachineOperand &MO = MI.getOperand(i);
9610  if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
9611  definesCPSR = true;
9612  if (MO.isDead())
9613  deadCPSR = true;
9614  MI.RemoveOperand(i);
9615  break;
9616  }
9617  }
9618  if (!definesCPSR) {
9619  assert(!NewOpc && "Optional cc_out operand required");
9620  return;
9621  }
9622  assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
9623  if (deadCPSR) {
9624  assert(!MI.getOperand(ccOutIdx).getReg() &&
9625  "expect uninitialized optional cc_out operand");
9626  // Thumb1 instructions must have the S bit even if the CPSR is dead.
9627  if (!Subtarget->isThumb1Only())
9628  return;
9629  }
9630 
9631  // If this instruction was defined with an optional CPSR def and its dag node
9632  // had a live implicit CPSR def, then activate the optional CPSR def.
9633  MachineOperand &MO = MI.getOperand(ccOutIdx);
9634  MO.setReg(ARM::CPSR);
9635  MO.setIsDef(true);
9636 }
9637 
9638 //===----------------------------------------------------------------------===//
9639 // ARM Optimization Hooks
9640 //===----------------------------------------------------------------------===//
9641 
9642 // Helper function that checks if N is a null or all ones constant.
9643 static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
9644  return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
9645 }
9646 
9647 // Return true if N is conditionally 0 or all ones.
9648 // Detects these expressions where cc is an i1 value:
9649 //
9650 // (select cc 0, y) [AllOnes=0]
9651 // (select cc y, 0) [AllOnes=0]
9652 // (zext cc) [AllOnes=0]
9653 // (sext cc) [AllOnes=0/1]
9654 // (select cc -1, y) [AllOnes=1]
9655 // (select cc y, -1) [AllOnes=1]
9656 //
9657 // Invert is set when N is the null/all ones constant when CC is false.
9658 // OtherOp is set to the alternative value of N.
9659 static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,
9660  SDValue &CC, bool &Invert,
9661  SDValue &OtherOp,
9662  SelectionDAG &DAG) {
9663  switch (N->getOpcode()) {
9664  default: return false;
9665  case ISD::SELECT: {
9666  CC = N->getOperand(0);
9667  SDValue N1 = N->getOperand(1);
9668  SDValue N2 = N->getOperand(2);
9669  if (isZeroOrAllOnes(N1, AllOnes)) {
9670  Invert = false;
9671  OtherOp = N2;
9672  return true;
9673  }
9674  if (isZeroOrAllOnes(N2, AllOnes)) {
9675  Invert = true;
9676  OtherOp = N1;
9677  return true;
9678  }
9679  return false;
9680  }
9681  case ISD::ZERO_EXTEND:
9682  // (zext cc) can never be the all ones value.
9683  if (AllOnes)
9684  return false;
9686  case ISD::SIGN_EXTEND: {
9687  SDLoc dl(N);
9688  EVT VT = N->getValueType(0);
9689  CC = N->getOperand(0);
9690  if (CC.getValueType() != MVT::i1 || CC.getOpcode() != ISD::SETCC)
9691  return false;
9692  Invert = !AllOnes;
9693  if (AllOnes)
9694  // When looking for an AllOnes constant, N is an sext, and the 'other'
9695  // value is 0.
9696  OtherOp = DAG.getConstant(0, dl, VT);
9697  else if (N->getOpcode() == ISD::ZERO_EXTEND)
9698  // When looking for a 0 constant, N can be zext or sext.
9699  OtherOp = DAG.getConstant(1, dl, VT);
9700  else
9701  OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl,
9702  VT);
9703  return true;
9704  }
9705  }
9706 }
9707 
9708 // Combine a constant select operand into its use:
9709 //
9710 // (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
9711 // (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
9712 // (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1]
9713 // (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
9714 // (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
9715 //
9716 // The transform is rejected if the select doesn't have a constant operand that
9717 // is null, or all ones when AllOnes is set.
9718 //
9719 // Also recognize sext/zext from i1:
9720 //
9721 // (add (zext cc), x) -> (select cc (add x, 1), x)
9722 // (add (sext cc), x) -> (select cc (add x, -1), x)
9723 //
9724 // These transformations eventually create predicated instructions.
9725 //
9726 // @param N The node to transform.
9727 // @param Slct The N operand that is a select.
9728 // @param OtherOp The other N operand (x above).
9729 // @param DCI Context.
9730 // @param AllOnes Require the select constant to be all ones instead of null.
9731 // @returns The new node, or SDValue() on failure.
9732 static
9735  bool AllOnes = false) {
9736  SelectionDAG &DAG = DCI.DAG;
9737  EVT VT = N->getValueType(0);
9738  SDValue NonConstantVal;
9739  SDValue CCOp;
9740  bool SwapSelectOps;
9741  if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
9742  NonConstantVal, DAG))
9743  return SDValue();
9744 
9745  // Slct is now know to be the desired identity constant when CC is true.
9746  SDValue TrueVal = OtherOp;
9747  SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
9748  OtherOp, NonConstantVal);
9749  // Unless SwapSelectOps says CC should be false.
9750  if (SwapSelectOps)
9751  std::swap(TrueVal, FalseVal);
9752 
9753  return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
9754  CCOp, TrueVal, FalseVal);
9755 }
9756 
9757 // Attempt combineSelectAndUse on each operand of a commutative operator N.
9758 static
9761  SDValue N0 = N->getOperand(0);
9762  SDValue N1 = N->getOperand(1);
9763  if (N0.getNode()->hasOneUse())
9764  if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes))
9765  return Result;
9766  if (N1.getNode()->hasOneUse())
9767  if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes))
9768  return Result;
9769  return SDValue();
9770 }
9771 
9772 static bool IsVUZPShuffleNode(SDNode *N) {
9773  // VUZP shuffle node.
9774  if (N->getOpcode() == ARMISD::VUZP)
9775  return true;
9776 
9777  // "VUZP" on i32 is an alias for VTRN.
9778  if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32)
9779  return true;
9780 
9781  return false;
9782 }
9783 
9786  const ARMSubtarget *Subtarget) {
9787  // Look for ADD(VUZP.0, VUZP.1).
9788  if (!IsVUZPShuffleNode(N0.getNode()) || N0.getNode() != N1.getNode() ||
9789  N0 == N1)
9790  return SDValue();
9791 
9792  // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD.
9793  if (!N->getValueType(0).is64BitVector())
9794  return SDValue();
9795 
9796  // Generate vpadd.
9797  SelectionDAG &DAG = DCI.DAG;
9798  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9799  SDLoc dl(N);
9800  SDNode *Unzip = N0.getNode();
9801  EVT VT = N->getValueType(0);
9802 
9805  TLI.getPointerTy(DAG.getDataLayout())));
9806  Ops.push_back(Unzip->getOperand(0));
9807  Ops.push_back(Unzip->getOperand(1));
9808 
9809  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
9810 }
9811 
9814  const ARMSubtarget *Subtarget) {
9815  // Check for two extended operands.
9816  if (!(N0.getOpcode() == ISD::SIGN_EXTEND &&
9817  N1.getOpcode() == ISD::SIGN_EXTEND) &&
9818  !(N0.getOpcode() == ISD::ZERO_EXTEND &&
9819  N1.getOpcode() == ISD::ZERO_EXTEND))
9820  return SDValue();
9821 
9822  SDValue N00 = N0.getOperand(0);
9823  SDValue N10 = N1.getOperand(0);
9824 
9825  // Look for ADD(SEXT(VUZP.0), SEXT(VUZP.1))
9826  if (!IsVUZPShuffleNode(N00.getNode()) || N00.getNode() != N10.getNode() ||
9827  N00 == N10)
9828  return SDValue();
9829 
9830  // We only recognize Q register paddl here; this can't be reached until
9831  // after type legalization.
9832  if (!N00.getValueType().is64BitVector() ||
9833  !N0.getValueType().is128BitVector())
9834  return SDValue();
9835 
9836  // Generate vpaddl.
9837  SelectionDAG &DAG = DCI.DAG;
9838  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9839  SDLoc dl(N);
9840  EVT VT = N->getValueType(0);
9841 
9843  // Form vpaddl.sN or vpaddl.uN depending on the kind of extension.
9844  unsigned Opcode;
9845  if (N0.getOpcode() == ISD::SIGN_EXTEND)
9846  Opcode = Intrinsic::arm_neon_vpaddls;
9847  else
9848  Opcode = Intrinsic::arm_neon_vpaddlu;
9849  Ops.push_back(DAG.getConstant(Opcode, dl,
9850  TLI.getPointerTy(DAG.getDataLayout())));
9851  EVT ElemTy = N00.getValueType().getVectorElementType();
9852  unsigned NumElts = VT.getVectorNumElements();
9853  EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), ElemTy, NumElts * 2);
9854  SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), ConcatVT,
9855  N00.getOperand(0), N00.getOperand(1));
9856  Ops.push_back(Concat);
9857 
9858  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
9859 }
9860 
9861 // FIXME: This function shouldn't be necessary; if we lower BUILD_VECTOR in
9862 // an appropriate manner, we end up with ADD(VUZP(ZEXT(N))), which is
9863 // much easier to match.
9864 static SDValue
9867  const ARMSubtarget *Subtarget) {
9868  // Only perform optimization if after legalize, and if NEON is available. We
9869  // also expected both operands to be BUILD_VECTORs.
9870  if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
9871  || N0.getOpcode() != ISD::BUILD_VECTOR
9872  || N1.getOpcode() != ISD::BUILD_VECTOR)
9873  return SDValue();
9874 
9875  // Check output type since VPADDL operand elements can only be 8, 16, or 32.
9876  EVT VT = N->getValueType(0);
9877  if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
9878  return SDValue();
9879 
9880  // Check that the vector operands are of the right form.
9881  // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
9882  // operands, where N is the size of the formed vector.
9883  // Each EXTRACT_VECTOR should have the same input vector and odd or even
9884  // index such that we have a pair wise add pattern.
9885 
9886  // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
9887  if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
9888  return SDValue();
9889  SDValue Vec = N0->getOperand(0)->getOperand(0);
9890  SDNode *V = Vec.getNode();
9891  unsigned nextIndex = 0;
9892 
9893  // For each operands to the ADD which are BUILD_VECTORs,
9894  // check to see if each of their operands are an EXTRACT_VECTOR with
9895  // the same vector and appropriate index.
9896  for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
9898  && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
9899 
9900  SDValue ExtVec0 = N0->getOperand(i);
9901  SDValue ExtVec1 = N1->getOperand(i);
9902 
9903  // First operand is the vector, verify its the same.
9904  if (V != ExtVec0->getOperand(0).getNode() ||
9905  V != ExtVec1->getOperand(0).getNode())
9906  return SDValue();
9907 
9908  // Second is the constant, verify its correct.
9909  ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
9910  ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
9911 
9912  // For the constant, we want to see all the even or all the odd.
9913  if (!C0 || !C1 || C0->getZExtValue() != nextIndex
9914  || C1->getZExtValue() != nextIndex+1)
9915  return SDValue();
9916 
9917  // Increment index.
9918  nextIndex+=2;
9919  } else
9920  return SDValue();
9921  }
9922 
9923  // Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also make sure
9924  // we're using the entire input vector, otherwise there's a size/legality
9925  // mismatch somewhere.
9926  if (nextIndex != Vec.getValueType().getVectorNumElements() ||
9928  return SDValue();
9929 
9930  // Create VPADDL node.
9931  SelectionDAG &DAG = DCI.DAG;
9932  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9933 
9934  SDLoc dl(N);
9935 
9936  // Build operand list.
9939  TLI.getPointerTy(DAG.getDataLayout())));
9940 
9941  // Input is the vector.
9942  Ops.push_back(Vec);
9943 
9944  // Get widened type and narrowed type.
9945  MVT widenType;
9946  unsigned numElem = VT.getVectorNumElements();
9947 
9948  EVT inputLaneType = Vec.getValueType().getVectorElementType();
9949  switch (inputLaneType.getSimpleVT().SimpleTy) {
9950  case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
9951  case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
9952  case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
9953  default:
9954  llvm_unreachable("Invalid vector element type for padd optimization.");
9955  }
9956 
9957  SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, widenType, Ops);
9958  unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
9959  return DAG.getNode(ExtOp, dl, VT, tmp);
9960 }
9961 
9963  if (V->getOpcode() == ISD::UMUL_LOHI ||
9964  V->getOpcode() == ISD::SMUL_LOHI)
9965  return V;
9966  return SDValue();
9967 }
9968 
9969 static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode,
9971  const ARMSubtarget *Subtarget) {
9972  if (Subtarget->isThumb()) {
9973  if (!Subtarget->hasDSP())
9974  return SDValue();
9975  } else if (!Subtarget->hasV5TEOps())
9976  return SDValue();
9977 
9978  // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and
9979  // accumulates the product into a 64-bit value. The 16-bit values will
9980  // be sign extended somehow or SRA'd into 32-bit values
9981  // (addc (adde (mul 16bit, 16bit), lo), hi)
9982  SDValue Mul = AddcNode->getOperand(0);
9983  SDValue Lo = AddcNode->getOperand(1);
9984  if (Mul.getOpcode() != ISD::MUL) {
9985  Lo = AddcNode->getOperand(0);
9986  Mul = AddcNode->getOperand(1);
9987  if (Mul.getOpcode() != ISD::MUL)
9988  return SDValue();
9989  }
9990 
9991  SDValue SRA = AddeNode->getOperand(0);
9992  SDValue Hi = AddeNode->getOperand(1);
9993  if (SRA.getOpcode() != ISD::SRA) {
9994  SRA = AddeNode->getOperand(1);
9995  Hi = AddeNode->getOperand(0);
9996  if (SRA.getOpcode() != ISD::SRA)
9997  return SDValue();
9998  }
9999  if (auto Const = dyn_cast<ConstantSDNode>(SRA.getOperand(1))) {
10000  if (Const->getZExtValue() != 31)
10001  return SDValue();
10002  } else
10003  return SDValue();
10004 
10005  if (SRA.getOperand(0) != Mul)
10006  return SDValue();
10007 
10008  SelectionDAG &DAG = DCI.DAG;
10009  SDLoc dl(AddcNode);
10010  unsigned Opcode = 0;
10011  SDValue Op0;
10012  SDValue Op1;
10013 
10014  if (isS16(Mul.getOperand(0), DAG) && isS16(Mul.getOperand(1), DAG)) {
10015  Opcode = ARMISD::SMLALBB;
10016  Op0 = Mul.getOperand(0);
10017  Op1 = Mul.getOperand(1);
10018  } else if (isS16(Mul.getOperand(0), DAG) && isSRA16(Mul.getOperand(1))) {
10019  Opcode = ARMISD::SMLALBT;
10020  Op0 = Mul.getOperand(0);
10021  Op1 = Mul.getOperand(1).getOperand(0);
10022  } else if (isSRA16(Mul.getOperand(0)) && isS16(Mul.getOperand(1), DAG)) {
10023  Opcode = ARMISD::SMLALTB;
10024  Op0 = Mul.getOperand(0).getOperand(0);
10025  Op1 = Mul.getOperand(1);
10026  } else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) {
10027  Opcode = ARMISD::SMLALTT;
10028  Op0 = Mul->getOperand(0).getOperand(0);
10029  Op1 = Mul->getOperand(1).getOperand(0);
10030  }
10031 
10032  if (!Op0 || !Op1)
10033  return SDValue();
10034 
10035  SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
10036  Op0, Op1, Lo, Hi);
10037  // Replace the ADDs' nodes uses by the MLA node's values.
10038  SDValue HiMLALResult(SMLAL.getNode(), 1);
10039  SDValue LoMLALResult(SMLAL.getNode(), 0);
10040 
10041  DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
10042  DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
10043 
10044  // Return original node to notify the driver to stop replacing.
10045  SDValue resNode(AddcNode, 0);
10046  return resNode;
10047 }
10048 
10049 static SDValue AddCombineTo64bitMLAL(SDNode *AddeSubeNode,
10051  const ARMSubtarget *Subtarget) {
10052  // Look for multiply add opportunities.
10053  // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
10054  // each add nodes consumes a value from ISD::UMUL_LOHI and there is
10055  // a glue link from the first add to the second add.
10056  // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
10057  // a S/UMLAL instruction.
10058  // UMUL_LOHI
10059  // / :lo \ :hi
10060  // V \ [no multiline comment]
10061  // loAdd -> ADDC |
10062  // \ :carry /
10063  // V V
10064  // ADDE <- hiAdd
10065  //
10066  // In the special case where only the higher part of a signed result is used
10067  // and the add to the low part of the result of ISD::UMUL_LOHI adds or subtracts
10068  // a constant with the exact value of 0x80000000, we recognize we are dealing
10069  // with a "rounded multiply and add" (or subtract) and transform it into
10070  // either a ARMISD::SMMLAR or ARMISD::SMMLSR respectively.
10071 
10072  assert((AddeSubeNode->getOpcode() == ARMISD::ADDE ||
10073  AddeSubeNode->getOpcode() == ARMISD::SUBE) &&
10074  "Expect an ADDE or SUBE");
10075 
10076  assert(AddeSubeNode->getNumOperands() == 3 &&
10077  AddeSubeNode->getOperand(2).getValueType() == MVT::i32 &&
10078  "ADDE node has the wrong inputs");
10079 
10080  // Check that we are chained to the right ADDC or SUBC node.
10081  SDNode *AddcSubcNode = AddeSubeNode->getOperand(2).getNode();
10082  if ((AddeSubeNode->getOpcode() == ARMISD::ADDE &&
10083  AddcSubcNode->getOpcode() != ARMISD::ADDC) ||
10084  (AddeSubeNode->getOpcode() == ARMISD::SUBE &&
10085  AddcSubcNode->getOpcode() != ARMISD::SUBC))
10086  return SDValue();
10087 
10088  SDValue AddcSubcOp0 = AddcSubcNode->getOperand(0);
10089  SDValue AddcSubcOp1 = AddcSubcNode->getOperand(1);
10090 
10091  // Check if the two operands are from the same mul_lohi node.
10092  if (AddcSubcOp0.getNode() == AddcSubcOp1.getNode())
10093  return SDValue();
10094 
10095  assert(AddcSubcNode->getNumValues() == 2 &&
10096  AddcSubcNode->getValueType(0) == MVT::i32 &&
10097  "Expect ADDC with two result values. First: i32");
10098 
10099  // Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it
10100  // maybe a SMLAL which multiplies two 16-bit values.
10101  if (AddeSubeNode->getOpcode() == ARMISD::ADDE &&
10102  AddcSubcOp0->getOpcode() != ISD::UMUL_LOHI &&
10103  AddcSubcOp0->getOpcode() != ISD::SMUL_LOHI &&
10104  AddcSubcOp1->getOpcode() != ISD::UMUL_LOHI &&
10105  AddcSubcOp1->getOpcode() != ISD::SMUL_LOHI)
10106  return AddCombineTo64BitSMLAL16(AddcSubcNode, AddeSubeNode, DCI, Subtarget);
10107 
10108  // Check for the triangle shape.
10109  SDValue AddeSubeOp0 = AddeSubeNode->getOperand(0);
10110  SDValue AddeSubeOp1 = AddeSubeNode->getOperand(1);
10111 
10112  // Make sure that the ADDE/SUBE operands are not coming from the same node.
10113  if (AddeSubeOp0.getNode() == AddeSubeOp1.getNode())
10114  return SDValue();
10115 
10116  // Find the MUL_LOHI node walking up ADDE/SUBE's operands.
10117  bool IsLeftOperandMUL = false;
10118  SDValue MULOp = findMUL_LOHI(AddeSubeOp0);
10119  if (MULOp == SDValue())
10120  MULOp = findMUL_LOHI(AddeSubeOp1);
10121  else
10122  IsLeftOperandMUL = true;
10123  if (MULOp == SDValue())
10124  return SDValue();
10125 
10126  // Figure out the right opcode.
10127  unsigned Opc = MULOp->getOpcode();
10128  unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
10129 
10130  // Figure out the high and low input values to the MLAL node.
10131  SDValue *HiAddSub = nullptr;
10132  SDValue *LoMul = nullptr;
10133  SDValue *LowAddSub = nullptr;
10134 
10135  // Ensure that ADDE/SUBE is from high result of ISD::xMUL_LOHI.
10136  if ((AddeSubeOp0 != MULOp.getValue(1)) && (AddeSubeOp1 != MULOp.getValue(1)))
10137  return SDValue();
10138 
10139  if (IsLeftOperandMUL)
10140  HiAddSub = &AddeSubeOp1;
10141  else
10142  HiAddSub = &AddeSubeOp0;
10143 
10144  // Ensure that LoMul and LowAddSub are taken from correct ISD::SMUL_LOHI node
10145  // whose low result is fed to the ADDC/SUBC we are checking.
10146 
10147  if (AddcSubcOp0 == MULOp.getValue(0)) {
10148  LoMul = &AddcSubcOp0;
10149  LowAddSub = &AddcSubcOp1;
10150  }
10151  if (AddcSubcOp1 == MULOp.getValue(0)) {
10152  LoMul = &AddcSubcOp1;
10153  LowAddSub = &AddcSubcOp0;
10154  }
10155 
10156  if (!LoMul)
10157  return SDValue();
10158 
10159  // If HiAddSub is the same node as ADDC/SUBC or is a predecessor of ADDC/SUBC
10160  // the replacement below will create a cycle.
10161  if (AddcSubcNode == HiAddSub->getNode() ||
10162  AddcSubcNode->isPredecessorOf(HiAddSub->getNode()))
10163  return SDValue();
10164 
10165  // Create the merged node.
10166  SelectionDAG &DAG = DCI.DAG;
10167 
10168  // Start building operand list.
10170  Ops.push_back(LoMul->getOperand(0));
10171  Ops.push_back(LoMul->getOperand(1));
10172 
10173  // Check whether we can use SMMLAR, SMMLSR or SMMULR instead. For this to be
10174  // the case, we must be doing signed multiplication and only use the higher
10175  // part of the result of the MLAL, furthermore the LowAddSub must be a constant
10176  // addition or subtraction with the value of 0x800000.
10177  if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && Subtarget->useMulOps() &&
10178  FinalOpc == ARMISD::SMLAL && !AddeSubeNode->hasAnyUseOfValue(1) &&
10179  LowAddSub->getNode()->getOpcode() == ISD::Constant &&
10180  static_cast<ConstantSDNode *>(LowAddSub->getNode())->getZExtValue() ==
10181  0x80000000) {
10182  Ops.push_back(*HiAddSub);
10183  if (AddcSubcNode->getOpcode() == ARMISD::SUBC) {
10184  FinalOpc = ARMISD::SMMLSR;
10185  } else {
10186  FinalOpc = ARMISD::SMMLAR;
10187  }
10188  SDValue NewNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode), MVT::i32, Ops);
10189  DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), NewNode);
10190 
10191  return SDValue(AddeSubeNode, 0);
10192  } else if (AddcSubcNode->getOpcode() == ARMISD::SUBC)
10193  // SMMLS is generated during instruction selection and the rest of this
10194  // function can not handle the case where AddcSubcNode is a SUBC.
10195  return SDValue();
10196 
10197  // Finish building the operand list for {U/S}MLAL
10198  Ops.push_back(*LowAddSub);
10199  Ops.push_back(*HiAddSub);
10200 
10201  SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode),
10202  DAG.getVTList(MVT::i32, MVT::i32), Ops);
10203 
10204  // Replace the ADDs' nodes uses by the MLA node's values.
10205  SDValue HiMLALResult(MLALNode.getNode(), 1);
10206  DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), HiMLALResult);
10207 
10208  SDValue LoMLALResult(MLALNode.getNode(), 0);
10209  DAG.ReplaceAllUsesOfValueWith(SDValue(AddcSubcNode, 0), LoMLALResult);
10210 
10211  // Return original node to notify the driver to stop replacing.
10212  return SDValue(AddeSubeNode, 0);
10213 }
10214 
10217  const ARMSubtarget *Subtarget) {
10218  // UMAAL is similar to UMLAL except that it adds two unsigned values.
10219  // While trying to combine for the other MLAL nodes, first search for the
10220  // chance to use UMAAL. Check if Addc uses a node which has already
10221  // been combined into a UMLAL. The other pattern is UMLAL using Addc/Adde
10222  // as the addend, and it's handled in PerformUMLALCombine.
10223 
10224  if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
10225  return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
10226 
10227  // Check that we have a glued ADDC node.
10228  SDNode* AddcNode = AddeNode->getOperand(2).getNode();
10229  if (AddcNode->getOpcode() != ARMISD::ADDC)
10230  return SDValue();
10231 
10232  // Find the converted UMAAL or quit if it doesn't exist.
10233  SDNode *UmlalNode = nullptr;
10234  SDValue AddHi;
10235  if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) {
10236  UmlalNode = AddcNode->getOperand(0).getNode();
10237  AddHi = AddcNode->getOperand(1);
10238  } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) {
10239  UmlalNode = AddcNode->getOperand(1).getNode();
10240  AddHi = AddcNode->getOperand(0);
10241  } else {
10242  return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
10243  }
10244 
10245  // The ADDC should be glued to an ADDE node, which uses the same UMLAL as
10246  // the ADDC as well as Zero.
10247  if (!isNullConstant(UmlalNode->getOperand(3)))
10248  return SDValue();
10249 
10250  if ((isNullConstant(AddeNode->getOperand(0)) &&
10251  AddeNode->getOperand(1).getNode() == UmlalNode) ||
10252  (AddeNode->getOperand(0).getNode() == UmlalNode &&
10253  isNullConstant(AddeNode->getOperand(1)))) {
10254  SelectionDAG &DAG = DCI.DAG;
10255  SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
10256  UmlalNode->getOperand(2), AddHi };
10257  SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode),
10258  DAG.getVTList(MVT::i32, MVT::i32), Ops);
10259 
10260  // Replace the ADDs' nodes uses by the UMAAL node's values.
10261  DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1));
10262  DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));
10263 
10264  // Return original node to notify the driver to stop replacing.
10265  return SDValue(AddeNode, 0);
10266  }
10267  return SDValue();
10268 }
10269 
10271  const ARMSubtarget *Subtarget) {
10272  if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
10273  return SDValue();
10274 
10275  // Check that we have a pair of ADDC and ADDE as operands.
10276  // Both addends of the ADDE must be zero.
10277  SDNode* AddcNode = N->getOperand(2).getNode();
10278  SDNode* AddeNode = N->getOperand(3).getNode();
10279  if ((AddcNode->getOpcode() == ARMISD::ADDC) &&
10280  (AddeNode->getOpcode() == ARMISD::ADDE) &&
10281  isNullConstant(AddeNode->getOperand(0)) &&
10282  isNullConstant(AddeNode->getOperand(1)) &&
10283  (AddeNode->getOperand(2).getNode() == AddcNode))
10284  return DAG.getNode(ARMISD::UMAAL, SDLoc(N),
10285  DAG.getVTList(MVT::i32, MVT::i32),
10286  {N->getOperand(0), N->getOperand(1),
10287  AddcNode->getOperand(0), AddcNode->getOperand(1)});
10288  else
10289  return SDValue();
10290 }
10291 
10294  const ARMSubtarget *Subtarget) {
10295  SelectionDAG &DAG(DCI.DAG);
10296 
10297  if (N->getOpcode() == ARMISD::SUBC) {
10298  // (SUBC (ADDE 0, 0, C), 1) -> C
10299  SDValue LHS = N->getOperand(0);
10300  SDValue RHS = N->getOperand(1);
10301  if (LHS->getOpcode() == ARMISD::ADDE &&
10302  isNullConstant(LHS->getOperand(0)) &&
10303  isNullConstant(LHS->getOperand(1)) && isOneConstant(RHS)) {
10304  return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
10305  }
10306  }
10307 
10308  if (Subtarget->isThumb1Only()) {
10309  SDValue RHS = N->getOperand(1);
10310  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
10311  int32_t imm = C->getSExtValue();
10312  if (imm < 0 && imm > std::numeric_limits<int>::min()) {
10313  SDLoc DL(N);
10314  RHS = DAG.getConstant(-imm, DL, MVT::i32);
10315  unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC
10316  : ARMISD::ADDC;
10317  return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS);
10318  }
10319  }
10320  }
10321 
10322  return SDValue();
10323 }
10324 
10327  const ARMSubtarget *Subtarget) {
10328  if (Subtarget->isThumb1Only()) {
10329  SelectionDAG &DAG = DCI.DAG;
10330  SDValue RHS = N->getOperand(1);
10331  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
10332  int64_t imm = C->getSExtValue();
10333  if (imm < 0) {
10334  SDLoc DL(N);
10335 
10336  // The with-carry-in form matches bitwise not instead of the negation.
10337  // Effectively, the inverse interpretation of the carry flag already
10338  // accounts for part of the negation.
10339  RHS = DAG.getConstant(~imm, DL, MVT::i32);
10340 
10341  unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE
10342  : ARMISD::ADDE;
10343  return DAG.getNode(Opcode, DL, N->getVTList(),
10344  N->getOperand(0), RHS, N->getOperand(2));
10345  }
10346  }
10347  } else if (N->getOperand(1)->getOpcode() == ISD::SMUL_LOHI) {
10348  return AddCombineTo64bitMLAL(N, DCI, Subtarget);
10349  }
10350  return SDValue();
10351 }
10352 
10353 /// PerformADDECombine - Target-specific dag combine transform from
10354 /// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or
10355 /// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
10358  const ARMSubtarget *Subtarget) {
10359  // Only ARM and Thumb2 support UMLAL/SMLAL.
10360  if (Subtarget->isThumb1Only())
10361  return PerformAddeSubeCombine(N, DCI, Subtarget);
10362 
10363  // Only perform the checks after legalize when the pattern is available.
10364  if (DCI.isBeforeLegalize()) return SDValue();
10365 
10366  return AddCombineTo64bitUMAAL(N, DCI, Subtarget);
10367 }
10368 
10369 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
10370 /// operands N0 and N1. This is a helper for PerformADDCombine that is
10371 /// called with the default operands, and if that fails, with commuted
10372 /// operands.
10375  const ARMSubtarget *Subtarget){
10376  // Attempt to create vpadd for this add.
10377  if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget))
10378  return Result;
10379 
10380  // Attempt to create vpaddl for this add.
10381  if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget))
10382  return Result;
10383  if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI,
10384  Subtarget))
10385  return Result;
10386 
10387  // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
10388  if (N0.getNode()->hasOneUse())
10389  if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI))
10390  return Result;
10391  return SDValue();
10392 }
10393 
10394 bool
10396  CombineLevel Level) const {
10397  if (Level == BeforeLegalizeTypes)
10398  return true;
10399 
10400  if (Subtarget->isThumb() && Subtarget->isThumb1Only())
10401  return true;
10402 
10403  if (N->getOpcode() != ISD::SHL)
10404  return true;
10405 
10406  // Turn off commute-with-shift transform after legalization, so it doesn't
10407  // conflict with PerformSHLSimplify. (We could try to detect when
10408  // PerformSHLSimplify would trigger more precisely, but it isn't
10409  // really necessary.)
10410  return false;
10411 }
10412 
10413 bool
10415  CombineLevel Level) const {
10416  if (!Subtarget->isThumb1Only())
10417  return true;
10418 
10419  if (Level == BeforeLegalizeTypes)
10420  return true;
10421 
10422  return false;
10423 }
10424 
10427  const ARMSubtarget *ST) {
10428  // Allow the generic combiner to identify potential bswaps.
10429  if (DCI.isBeforeLegalize())
10430  return SDValue();
10431 
10432  // DAG combiner will fold:
10433  // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
10434  // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2
10435  // Other code patterns that can be also be modified have the following form:
10436  // b + ((a << 1) | 510)
10437  // b + ((a << 1) & 510)
10438  // b + ((a << 1) ^ 510)
10439  // b + ((a << 1) + 510)
10440 
10441  // Many instructions can perform the shift for free, but it requires both
10442  // the operands to be registers. If c1 << c2 is too large, a mov immediate
10443  // instruction will needed. So, unfold back to the original pattern if:
10444  // - if c1 and c2 are small enough that they don't require mov imms.
10445  // - the user(s) of the node can perform an shl
10446 
10447  // No shifted operands for 16-bit instructions.
10448  if (ST->isThumb() && ST->isThumb1Only())
10449  return SDValue();
10450 
10451  // Check that all the users could perform the shl themselves.
10452  for (auto U : N->uses()) {
10453  switch(U->getOpcode()) {
10454  default:
10455  return SDValue();
10456  case ISD::SUB:
10457  case ISD::ADD:
10458  case ISD::AND:
10459  case ISD::OR:
10460  case ISD::XOR:
10461  case ISD::SETCC:
10462  case ARMISD::CMP:
10463  // Check that the user isn't already using a constant because there
10464  // aren't any instructions that support an immediate operand and a
10465  // shifted operand.
10466  if (isa<ConstantSDNode>(U->getOperand(0)) ||
10467  isa<ConstantSDNode>(U->getOperand(1)))
10468  return SDValue();
10469 
10470  // Check that it's not already using a shift.
10471  if (U->getOperand(0).getOpcode() == ISD::SHL ||
10472  U->getOperand(1).getOpcode() == ISD::SHL)
10473  return SDValue();
10474  break;
10475  }
10476  }
10477 
10478  if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::OR &&
10479  N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND)
10480  return SDValue();
10481 
10482  if (N->getOperand(0).getOpcode() != ISD::SHL)
10483  return SDValue();
10484 
10485  SDValue SHL = N->getOperand(0);
10486 
10487  auto *C1ShlC2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
10488  auto *C2 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
10489  if (!C1ShlC2 || !C2)
10490  return SDValue();
10491 
10492  APInt C2Int = C2->getAPIntValue();
10493  APInt C1Int = C1ShlC2->getAPIntValue();
10494 
10495  // Check that performing a lshr will not lose any information.
10497  C2Int.getBitWidth() - C2->getZExtValue());
10498  if ((C1Int & Mask) != C1Int)
10499  return SDValue();
10500 
10501  // Shift the first constant.
10502  C1Int.lshrInPlace(C2Int);
10503 
10504  // The immediates are encoded as an 8-bit value that can be rotated.
10505  auto LargeImm = [](const APInt &Imm) {
10506  unsigned Zeros = Imm.countLeadingZeros() + Imm.countTrailingZeros();
10507  return Imm.getBitWidth() - Zeros > 8;
10508  };
10509 
10510  if (LargeImm(C1Int) || LargeImm(C2Int))
10511  return SDValue();
10512 
10513  SelectionDAG &DAG = DCI.DAG;
10514  SDLoc dl(N);
10515  SDValue X = SHL.getOperand(0);
10516  SDValue BinOp = DAG.getNode(N->getOpcode(), dl, MVT::i32, X,
10517  DAG.getConstant(C1Int, dl, MVT::i32));
10518  // Shift left to compensate for the lshr of C1Int.
10519  SDValue Res = DAG.getNode(ISD::SHL, dl, MVT::i32, BinOp, SHL.getOperand(1));
10520 
10521  LLVM_DEBUG(dbgs() << "Simplify shl use:\n"; SHL.getOperand(0).dump();
10522  SHL.dump(); N->dump());
10523  LLVM_DEBUG(dbgs() << "Into:\n"; X.dump(); BinOp.dump(); Res.dump());
10524  return Res;
10525 }
10526 
10527 
10528 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
10529 ///
10532  const ARMSubtarget *Subtarget) {
10533  SDValue N0 = N->getOperand(0);
10534  SDValue N1 = N->getOperand(1);
10535 
10536  // Only works one way, because it needs an immediate operand.
10537  if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
10538  return Result;
10539 
10540  // First try with the default operand order.
10541  if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget))
10542  return Result;
10543 
10544  // If that didn't work, try again with the operands commuted.
10545  return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
10546 }
10547 
10548 /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
10549 ///
10552  SDValue N0 = N->getOperand(0);
10553  SDValue N1 = N->getOperand(1);
10554 
10555  // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
10556  if (N1.getNode()->hasOneUse())
10557  if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI))
10558  return Result;
10559 
10560  return SDValue();
10561 }
10562 
10563 /// PerformVMULCombine
10564 /// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
10565 /// special multiplier accumulator forwarding.
10566 /// vmul d3, d0, d2
10567 /// vmla d3, d1, d2
10568 /// is faster than
10569 /// vadd d3, d0, d1
10570 /// vmul d3, d3, d2
10571 // However, for (A + B) * (A + B),
10572 // vadd d2, d0, d1
10573 // vmul d3, d0, d2
10574 // vmla d3, d1, d2
10575 // is slower than
10576 // vadd d2, d0, d1
10577 // vmul d3, d2, d2
10580  const ARMSubtarget *Subtarget) {
10581  if (!Subtarget->hasVMLxForwarding())
10582  return SDValue();
10583 
10584  SelectionDAG &DAG = DCI.DAG;
10585  SDValue N0 = N->getOperand(0);
10586  SDValue N1 = N->getOperand(1);
10587  unsigned Opcode = N0.getOpcode();
10588  if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
10589  Opcode != ISD::FADD && Opcode != ISD::FSUB) {
10590  Opcode = N1.getOpcode();
10591  if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
10592  Opcode != ISD::FADD && Opcode != ISD::FSUB)
10593  return SDValue();
10594  std::swap(N0, N1);
10595  }
10596 
10597  if (N0 == N1)
10598  return SDValue();
10599 
10600  EVT VT = N->getValueType(0);
10601  SDLoc DL(N);
10602  SDValue N00 = N0->getOperand(0);
10603  SDValue N01 = N0->getOperand(1);
10604  return DAG.getNode(Opcode, DL, VT,
10605  DAG.getNode(ISD::MUL, DL, VT, N00, N1),
10606  DAG.getNode(ISD::MUL, DL, VT, N01, N1));
10607 }
10608 
10611  const ARMSubtarget *Subtarget) {
10612  SelectionDAG &DAG = DCI.DAG;
10613 
10614  if (Subtarget->isThumb1Only())
10615  return SDValue();
10616 
10617  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
10618  return SDValue();
10619 
10620  EVT VT = N->getValueType(0);
10621  if (VT.is64BitVector() || VT.is128BitVector())
10622  return PerformVMULCombine(N, DCI, Subtarget);
10623  if (VT != MVT::i32)
10624  return SDValue();
10625 
10627  if (!C)
10628  return SDValue();
10629 
10630  int64_t MulAmt = C->getSExtValue();
10631  unsigned ShiftAmt = countTrailingZeros<uint64_t>(MulAmt);
10632 
10633  ShiftAmt = ShiftAmt & (32 - 1);
10634  SDValue V = N->getOperand(0);
10635  SDLoc DL(N);
10636 
10637  SDValue Res;
10638  MulAmt >>= ShiftAmt;
10639 
10640  if (MulAmt >= 0) {
10641  if (isPowerOf2_32(MulAmt - 1)) {
10642  // (mul x, 2^N + 1) => (add (shl x, N), x)
10643  Res = DAG.getNode(ISD::ADD, DL, VT,
10644  V,
10645  DAG.getNode(ISD::SHL, DL, VT,
10646  V,
10647  DAG.getConstant(Log2_32(MulAmt - 1), DL,
10648  MVT::i32)));
10649  } else if (isPowerOf2_32(MulAmt + 1)) {
10650  // (mul x, 2^N - 1) => (sub (shl x, N), x)
10651  Res = DAG.getNode(ISD::SUB, DL, VT,
10652  DAG.getNode(ISD::SHL, DL, VT,
10653  V,
10654  DAG.getConstant(Log2_32(MulAmt + 1), DL,
10655  MVT::i32)),
10656  V);
10657  } else
10658  return SDValue();
10659  } else {
10660  uint64_t MulAmtAbs = -MulAmt;
10661  if (isPowerOf2_32(MulAmtAbs + 1)) {
10662  // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
10663  Res = DAG.getNode(ISD::SUB, DL, VT,
10664  V,
10665  DAG.getNode(ISD::SHL, DL, VT,
10666  V,
10667  DAG.getConstant(Log2_32(MulAmtAbs + 1), DL,
10668  MVT::i32)));
10669  } else if (isPowerOf2_32(MulAmtAbs - 1)) {
10670  // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
10671  Res = DAG.getNode(ISD::ADD, DL, VT,
10672  V,
10673  DAG.getNode(ISD::SHL, DL, VT,
10674  V,
10675  DAG.getConstant(Log2_32(MulAmtAbs - 1), DL,
10676  MVT::i32)));
10677  Res = DAG.getNode(ISD::SUB, DL, VT,
10678  DAG.getConstant(0, DL, MVT::i32), Res);
10679  } else
10680  return SDValue();
10681  }
10682 
10683  if (ShiftAmt != 0)
10684  Res = DAG.getNode(ISD::SHL, DL, VT,
10685  Res, DAG.getConstant(ShiftAmt, DL, MVT::i32));
10686 
10687  // Do not add new nodes to DAG combiner worklist.
10688  DCI.CombineTo(N, Res, false);
10689  return SDValue();
10690 }
10691 
10694  const ARMSubtarget *Subtarget) {
10695  // Allow DAGCombine to pattern-match before we touch the canonical form.
10696  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
10697  return SDValue();
10698 
10699  if (N->getValueType(0) != MVT::i32)
10700  return SDValue();
10701 
10703  if (!N1C)
10704  return SDValue();
10705 
10706  uint32_t C1 = (uint32_t)N1C->getZExtValue();
10707  // Don't transform uxtb/uxth.
10708  if (C1 == 255 || C1 == 65535)
10709  return SDValue();
10710 
10711  SDNode *N0 = N->getOperand(0).getNode();
10712  if (!N0->hasOneUse())
10713  return SDValue();
10714 
10715  if (N0->getOpcode() != ISD::SHL && N0->getOpcode() != ISD::SRL)
10716  return SDValue();
10717 
10718  bool LeftShift = N0->getOpcode() == ISD::SHL;
10719 
10721  if (!N01C)
10722  return SDValue();
10723 
10724  uint32_t C2 = (uint32_t)N01C->getZExtValue();
10725  if (!C2 || C2 >= 32)
10726  return SDValue();
10727 
10728  // Clear irrelevant bits in the mask.
10729  if (LeftShift)
10730  C1 &= (-1U << C2);
10731  else
10732  C1 &= (-1U >> C2);
10733 
10734  SelectionDAG &DAG = DCI.DAG;
10735  SDLoc DL(N);
10736 
10737  // We have a pattern of the form "(and (shl x, c2) c1)" or
10738  // "(and (srl x, c2) c1)", where c1 is a shifted mask. Try to
10739  // transform to a pair of shifts, to save materializing c1.
10740 
10741  // First pattern: right shift, then mask off leading bits.
10742  // FIXME: Use demanded bits?
10743  if (!LeftShift && isMask_32(C1)) {
10744  uint32_t C3 = countLeadingZeros(C1);
10745  if (C2 < C3) {
10746  SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
10747  DAG.getConstant(C3 - C2, DL, MVT::i32));
10748  return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
10749  DAG.getConstant(C3, DL, MVT::i32));
10750  }
10751  }
10752 
10753  // First pattern, reversed: left shift, then mask off trailing bits.
10754  if (LeftShift && isMask_32(~C1)) {
10755  uint32_t C3 = countTrailingZeros(C1);
10756  if (C2 < C3) {
10757  SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
10758  DAG.getConstant(C3 - C2, DL, MVT::i32));
10759  return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
10760  DAG.getConstant(C3, DL, MVT::i32));
10761  }
10762  }
10763 
10764  // Second pattern: left shift, then mask off leading bits.
10765  // FIXME: Use demanded bits?
10766  if (LeftShift && isShiftedMask_32(C1)) {
10767  uint32_t Trailing = countTrailingZeros(C1);
10768  uint32_t C3 = countLeadingZeros(C1);
10769  if (Trailing == C2 && C2 + C3 < 32) {
10770  SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
10771  DAG.getConstant(C2 + C3, DL, MVT::i32));
10772  return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
10773  DAG.getConstant(C3, DL, MVT::i32));
10774  }
10775  }
10776 
10777  // Second pattern, reversed: right shift, then mask off trailing bits.
10778  // FIXME: Handle other patterns of known/demanded bits.
10779  if (!LeftShift && isShiftedMask_32(C1)) {
10780  uint32_t Leading = countLeadingZeros(C1);
10781  uint32_t C3 = countTrailingZeros(C1);
10782  if (Leading == C2 && C2 + C3 < 32) {
10783  SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
10784  DAG.getConstant(C2 + C3, DL, MVT::i32));
10785  return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
10786  DAG.getConstant(C3, DL, MVT::i32));
10787  }
10788  }
10789 
10790  // FIXME: Transform "(and (shl x, c2) c1)" ->
10791  // "(shl (and x, c1>>c2), c2)" if "c1 >> c2" is a cheaper immediate than
10792  // c1.
10793  return SDValue();
10794 }
10795 
10798  const ARMSubtarget *Subtarget) {
10799  // Attempt to use immediate-form VBIC
10801  SDLoc dl(N);
10802  EVT VT = N->getValueType(0);
10803  SelectionDAG &DAG = DCI.DAG;
10804 
10805  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
10806  return SDValue();
10807 
10808  APInt SplatBits, SplatUndef;
10809  unsigned SplatBitSize;
10810  bool HasAnyUndefs;
10811  if (BVN &&
10812  BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
10813  if (SplatBitSize <= 64) {
10814  EVT VbicVT;
10815  SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(),
10816  SplatUndef.getZExtValue(), SplatBitSize,
10817  DAG, dl, VbicVT, VT.is128BitVector(),
10818  OtherModImm);
10819  if (Val.getNode()) {
10820  SDValue Input =
10821  DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
10822  SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
10823  return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
10824  }
10825  }
10826  }
10827 
10828  if (!Subtarget->isThumb1Only()) {
10829  // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
10830  if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI))
10831  return Result;
10832 
10833  if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
10834  return Result;
10835  }
10836 
10837  if (Subtarget->isThumb1Only())
10838  if (SDValue Result = CombineANDShift(N, DCI, Subtarget))
10839  return Result;
10840 
10841  return SDValue();
10842 }
10843 
10844 // Try combining OR nodes to SMULWB, SMULWT.
10847  const ARMSubtarget *Subtarget) {
10848  if (!Subtarget->hasV6Ops() ||
10849  (Subtarget->isThumb() &&
10850  (!Subtarget->hasThumb2() || !Subtarget->hasDSP())))
10851  return SDValue();
10852 
10853  SDValue SRL = OR->getOperand(0);
10854  SDValue SHL = OR->getOperand(1);
10855 
10856  if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
10857  SRL = OR->getOperand(1);
10858  SHL = OR->getOperand(0);
10859  }
10860  if (!isSRL16(SRL) || !isSHL16(SHL))
10861  return SDValue();
10862 
10863  // The first operands to the shifts need to be the two results from the
10864  // same smul_lohi node.
10865  if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
10866  SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
10867  return SDValue();
10868 
10869  SDNode *SMULLOHI = SRL.getOperand(0).getNode();
10870  if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
10871  SHL.getOperand(0) != SDValue(SMULLOHI, 1))
10872  return SDValue();
10873 
10874  // Now we have:
10875  // (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
10876  // For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
10877  // For SMUWB the 16-bit value will signed extended somehow.
10878  // For SMULWT only the SRA is required.
10879  // Check both sides of SMUL_LOHI
10880  SDValue OpS16 = SMULLOHI->getOperand(0);
10881  SDValue OpS32 = SMULLOHI->getOperand(1);
10882 
10883  SelectionDAG &DAG = DCI.DAG;
10884  if (!isS16(OpS16, DAG) && !isSRA16(OpS16)) {
10885  OpS16 = OpS32;
10886  OpS32 = SMULLOHI->getOperand(0);
10887  }
10888 
10889  SDLoc dl(OR);
10890  unsigned Opcode = 0;
10891  if (isS16(OpS16, DAG))
10892  Opcode = ARMISD::SMULWB;
10893  else if (isSRA16(OpS16)) {
10894  Opcode = ARMISD::SMULWT;
10895  OpS16 = OpS16->getOperand(0);
10896  }
10897  else
10898  return SDValue();
10899 
10900  SDValue Res = DAG.getNode(Opcode, dl, MVT::i32, OpS32, OpS16);
10901  DAG.ReplaceAllUsesOfValueWith(SDValue(OR, 0), Res);
10902  return SDValue(OR, 0);
10903 }
10904 
10907  const ARMSubtarget *Subtarget) {
10908  // BFI is only available on V6T2+
10909  if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
10910  return SDValue();
10911 
10912  EVT VT = N->getValueType(0);
10913  SDValue N0 = N->getOperand(0);
10914  SDValue N1 = N->getOperand(1);
10915  SelectionDAG &DAG = DCI.DAG;
10916  SDLoc DL(N);
10917  // 1) or (and A, mask), val => ARMbfi A, val, mask
10918  // iff (val & mask) == val
10919  //
10920  // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
10921  // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
10922  // && mask == ~mask2
10923  // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
10924  // && ~mask == mask2
10925  // (i.e., copy a bitfield value into another bitfield of the same width)
10926 
10927  if (VT != MVT::i32)
10928  return SDValue();
10929 
10930  SDValue N00 = N0.getOperand(0);
10931 
10932  // The value and the mask need to be constants so we can verify this is
10933  // actually a bitfield set. If the mask is 0xffff, we can do better
10934  // via a movt instruction, so don't use BFI in that case.
10935  SDValue MaskOp = N0.getOperand(1);
10936  ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
10937  if (!MaskC)
10938  return SDValue();
10939  unsigned Mask = MaskC->getZExtValue();
10940  if (Mask == 0xffff)
10941  return SDValue();
10942  SDValue Res;
10943  // Case (1): or (and A, mask), val => ARMbfi A, val, mask
10945  if (N1C) {
10946  unsigned Val = N1C->getZExtValue();
10947  if ((Val & ~Mask) != Val)
10948  return SDValue();
10949 
10950  if (ARM::isBitFieldInvertedMask(Mask)) {
10951  Val >>= countTrailingZeros(~Mask);
10952 
10953  Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
10954  DAG.getConstant(Val, DL, MVT::i32),
10955  DAG.getConstant(Mask, DL, MVT::i32));
10956 
10957  DCI.CombineTo(N, Res, false);
10958  // Return value from the original node to inform the combiner than N is
10959  // now dead.
10960  return SDValue(N, 0);
10961  }
10962  } else if (N1.getOpcode() == ISD::AND) {
10963  // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
10965  if (!N11C)
10966  return SDValue();
10967  unsigned Mask2 = N11C->getZExtValue();
10968 
10969  // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
10970  // as is to match.
10971  if (ARM::isBitFieldInvertedMask(Mask) &&
10972  (Mask == ~Mask2)) {
10973  // The pack halfword instruction works better for masks that fit it,
10974  // so use that when it's available.
10975  if (Subtarget->hasDSP() &&
10976  (Mask == 0xffff || Mask == 0xffff0000))
10977  return SDValue();
10978  // 2a
10979  unsigned amt = countTrailingZeros(Mask2);
10980  Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
10981  DAG.getConstant(amt, DL, MVT::i32));
10982  Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
10983  DAG.getConstant(Mask, DL, MVT::i32));
10984  DCI.CombineTo(N, Res, false);
10985  // Return value from the original node to inform the combiner than N is
10986  // now dead.
10987  return SDValue(N, 0);
10988  } else if (ARM::isBitFieldInvertedMask(~Mask) &&
10989  (~Mask == Mask2)) {
10990  // The pack halfword instruction works better for masks that fit it,
10991  // so use that when it's available.
10992  if (Subtarget->hasDSP() &&
10993  (Mask2 == 0xffff || Mask2 == 0xffff0000))
10994  return SDValue();
10995  // 2b
10996  unsigned lsb = countTrailingZeros(Mask);
10997  Res = DAG.getNode(ISD::SRL, DL, VT, N00,
10998  DAG.getConstant(lsb, DL, MVT::i32));
10999  Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
11000  DAG.getConstant(Mask2, DL, MVT::i32));
11001  DCI.CombineTo(N, Res, false);
11002  // Return value from the original node to inform the combiner than N is
11003  // now dead.
11004  return SDValue(N, 0);
11005  }
11006  }
11007 
11008  if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
11009  N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
11010  ARM::isBitFieldInvertedMask(~Mask)) {
11011  // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
11012  // where lsb(mask) == #shamt and masked bits of B are known zero.
11013  SDValue ShAmt = N00.getOperand(1);
11014  unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
11015  unsigned LSB = countTrailingZeros(Mask);
11016  if (ShAmtC != LSB)
11017  return SDValue();
11018 
11019  Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
11020  DAG.getConstant(~Mask, DL, MVT::i32));
11021 
11022  DCI.CombineTo(N, Res, false);
11023  // Return value from the original node to inform the combiner than N is
11024  // now dead.
11025  return SDValue(N, 0);
11026  }
11027 
11028  return SDValue();
11029 }
11030 
11031 /// PerformORCombine - Target-specific dag combine xforms for ISD::OR
11034  const ARMSubtarget *Subtarget) {
11035  // Attempt to use immediate-form VORR
11037  SDLoc dl(N);
11038  EVT VT = N->getValueType(0);
11039  SelectionDAG &DAG = DCI.DAG;
11040 
11041  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
11042  return SDValue();
11043 
11044  APInt SplatBits, SplatUndef;
11045  unsigned SplatBitSize;
11046  bool HasAnyUndefs;
11047  if (BVN && Subtarget->hasNEON() &&
11048  BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
11049  if (SplatBitSize <= 64) {
11050  EVT VorrVT;
11051  SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
11052  SplatUndef.getZExtValue(), SplatBitSize,
11053  DAG, dl, VorrVT, VT.is128BitVector(),
11054  OtherModImm);
11055  if (Val.getNode()) {
11056  SDValue Input =
11057  DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
11058  SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
11059  return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
11060  }
11061  }
11062  }
11063 
11064  if (!Subtarget->isThumb1Only()) {
11065  // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
11066  if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
11067  return Result;
11068  if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
11069  return Result;
11070  }
11071 
11072  SDValue N0 = N->getOperand(0);
11073  SDValue N1 = N->getOperand(1);
11074 
11075  // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
11076  if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
11077  DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
11078 
11079  // The code below optimizes (or (and X, Y), Z).
11080  // The AND operand needs to have a single user to make these optimizations
11081  // profitable.
11082  if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
11083  return SDValue();
11084 
11085  APInt SplatUndef;
11086  unsigned SplatBitSize;
11087  bool HasAnyUndefs;
11088 
11089  APInt SplatBits0, SplatBits1;
11092  // Ensure that the second operand of both ands are constants
11093  if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
11094  HasAnyUndefs) && !HasAnyUndefs) {
11095  if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
11096  HasAnyUndefs) && !HasAnyUndefs) {
11097  // Ensure that the bit width of the constants are the same and that
11098  // the splat arguments are logical inverses as per the pattern we
11099  // are trying to simplify.
11100  if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
11101  SplatBits0 == ~SplatBits1) {
11102  // Canonicalize the vector type to make instruction selection
11103  // simpler.
11104  EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
11105  SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
11106  N0->getOperand(1),
11107  N0->getOperand(0),
11108  N1->getOperand(0));
11109  return DAG.getNode(ISD::BITCAST, dl, VT, Result);
11110  }
11111  }
11112  }
11113  }
11114 
11115  // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
11116  // reasonable.
11117  if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
11118  if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget))
11119  return Res;
11120  }
11121 
11122  if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
11123  return Result;
11124 
11125  return SDValue();
11126 }
11127 
11130  const ARMSubtarget *Subtarget) {
11131  EVT VT = N->getValueType(0);
11132  SelectionDAG &DAG = DCI.DAG;
11133 
11134  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
11135  return SDValue();
11136 
11137  if (!Subtarget->isThumb1Only()) {
11138  // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
11139  if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
11140  return Result;
11141 
11142  if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
11143  return Result;
11144  }
11145 
11146  return SDValue();
11147 }
11148 
11149 // ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,
11150 // and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and
11151 // their position in "to" (Rd).
11152 static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
11153  assert(N->getOpcode() == ARMISD::BFI);
11154 
11155  SDValue From = N->getOperand(1);
11156  ToMask = ~cast<ConstantSDNode>(N->getOperand(2))->getAPIntValue();
11157  FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.countPopulation());
11158 
11159  // If the Base came from a SHR #C, we can deduce that it is really testing bit
11160  // #C in the base of the SHR.
11161  if (From->getOpcode() == ISD::SRL &&
11162  isa<ConstantSDNode>(From->getOperand(1))) {
11163  APInt Shift = cast<ConstantSDNode>(From->getOperand(1))->getAPIntValue();
11164  assert(Shift.getLimitedValue() < 32 && "Shift too large!");
11165  FromMask <<= Shift.getLimitedValue(31);
11166  From = From->getOperand(0);
11167  }
11168 
11169  return From;
11170 }
11171 
11172 // If A and B contain one contiguous set of bits, does A | B == A . B?
11173 //
11174 // Neither A nor B must be zero.
11175 static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) {
11176  unsigned LastActiveBitInA = A.countTrailingZeros();
11177  unsigned FirstActiveBitInB = B.getBitWidth() - B.countLeadingZeros() - 1;
11178  return LastActiveBitInA - 1 == FirstActiveBitInB;
11179 }
11180 
11182  // We have a BFI in N. Follow a possible chain of BFIs and find a BFI it can combine with,
11183  // if one exists.
11184  APInt ToMask, FromMask;
11185  SDValue From = ParseBFI(N, ToMask, FromMask);
11186  SDValue To = N->getOperand(0);
11187 
11188  // Now check for a compatible BFI to merge with. We can pass through BFIs that
11189  // aren't compatible, but not if they set the same bit in their destination as
11190  // we do (or that of any BFI we're going to combine with).
11191  SDValue V = To;
11192  APInt CombinedToMask = ToMask;
11193  while (V.getOpcode() == ARMISD::BFI) {
11194  APInt NewToMask, NewFromMask;
11195  SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask);
11196  if (NewFrom != From) {
11197  // This BFI has a different base. Keep going.
11198  CombinedToMask |= NewToMask;
11199  V = V.getOperand(0);
11200  continue;
11201  }
11202 
11203  // Do the written bits conflict with any we've seen so far?
11204  if ((NewToMask & CombinedToMask).getBoolValue())
11205  // Conflicting bits - bail out because going further is unsafe.
11206  return SDValue();
11207 
11208  // Are the new bits contiguous when combined with the old bits?
11209  if (BitsProperlyConcatenate(ToMask, NewToMask) &&
11210  BitsProperlyConcatenate(FromMask, NewFromMask))
11211  return V;
11212  if (BitsProperlyConcatenate(NewToMask, ToMask) &&
11213  BitsProperlyConcatenate(NewFromMask, FromMask))
11214  return V;
11215 
11216  // We've seen a write to some bits, so track it.
11217  CombinedToMask |= NewToMask;
11218  // Keep going...
11219  V = V.getOperand(0);
11220  }
11221 
11222  return SDValue();
11223 }
11224 
11227  SDValue N1 = N->getOperand(1);
11228  if (N1.getOpcode() == ISD::AND) {
11229  // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
11230  // the bits being cleared by the AND are not demanded by the BFI.
11232  if (!N11C)
11233  return SDValue();
11234  unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
11235  unsigned LSB = countTrailingZeros(~InvMask);
11236  unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB;
11237  assert(Width <
11238  static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
11239  "undefined behavior");
11240  unsigned Mask = (1u << Width) - 1;
11241  unsigned Mask2 = N11C->getZExtValue();
11242  if ((Mask & (~Mask2)) == 0)
11243  return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
11244  N->getOperand(0), N1.getOperand(0),
11245  N->getOperand(2));
11246  } else if (N->getOperand(0).getOpcode() == ARMISD::BFI) {
11247  // We have a BFI of a BFI. Walk up the BFI chain to see how long it goes.
11248  // Keep track of any consecutive bits set that all come from the same base
11249  // value. We can combine these together into a single BFI.
11250  SDValue CombineBFI = FindBFIToCombineWith(N);
11251  if (CombineBFI == SDValue())
11252  return SDValue();
11253 
11254  // We've found a BFI.
11255  APInt ToMask1, FromMask1;
11256  SDValue From1 = ParseBFI(N, ToMask1, FromMask1);
11257 
11258  APInt ToMask2, FromMask2;
11259  SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2);
11260  assert(From1 == From2);
11261  (void)From2;
11262 
11263  // First, unlink CombineBFI.
11264  DCI.DAG.ReplaceAllUsesWith(CombineBFI, CombineBFI.getOperand(0));
11265  // Then create a new BFI, combining the two together.
11266  APInt NewFromMask = FromMask1 | FromMask2;
11267  APInt NewToMask = ToMask1 | ToMask2;
11268 
11269  EVT VT = N->getValueType(0);
11270  SDLoc dl(N);
11271 
11272  if (NewFromMask[0] == 0)
11273  From1 = DCI.DAG.getNode(
11274  ISD::SRL, dl, VT, From1,
11275  DCI.DAG.getConstant(NewFromMask.countTrailingZeros(), dl, VT));
11276  return DCI.DAG.getNode(ARMISD::BFI, dl, VT, N->getOperand(0), From1,
11277  DCI.DAG.getConstant(~NewToMask, dl, VT));
11278  }
11279  return SDValue();
11280 }
11281 
11282 /// PerformVMOVRRDCombine - Target-specific dag combine xforms for
11283 /// ARMISD::VMOVRRD.
11286  const ARMSubtarget *Subtarget) {
11287  // vmovrrd(vmovdrr x, y) -> x,y
11288  SDValue InDouble = N->getOperand(0);
11289  if (InDouble.getOpcode() == ARMISD::VMOVDRR && !Subtarget->isFPOnlySP())
11290  return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
11291 
11292  // vmovrrd(load f64) -> (load i32), (load i32)
11293  SDNode *InNode = InDouble.getNode();
11294  if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
11295  InNode->getValueType(0) == MVT::f64 &&
11296  InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
11297  !cast<LoadSDNode>(InNode)->isVolatile()) {
11298  // TODO: Should this be done for non-FrameIndex operands?
11299  LoadSDNode *LD = cast<LoadSDNode>(InNode);
11300 
11301  SelectionDAG &DAG = DCI.DAG;
11302  SDLoc DL(LD);
11303  SDValue BasePtr = LD->getBasePtr();
11304  SDValue NewLD1 =
11305  DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(),
11306  LD->getAlignment(), LD->getMemOperand()->getFlags());
11307 
11308  SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
11309  DAG.getConstant(4, DL, MVT::i32));
11310  SDValue NewLD2 = DAG.getLoad(
11311  MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, LD->getPointerInfo(),
11312  std::min(4U, LD->getAlignment() / 2), LD->getMemOperand()->getFlags());
11313 
11314  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
11315  if (DCI.DAG.getDataLayout().isBigEndian())
11316  std::swap (NewLD1, NewLD2);
11317  SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
11318  return Result;
11319  }
11320 
11321  return SDValue();
11322 }
11323 
11324 /// PerformVMOVDRRCombine - Target-specific dag combine xforms for
11325 /// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
11327  // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
11328  SDValue Op0 = N->getOperand(0);
11329  SDValue Op1 = N->getOperand(1);
11330  if (Op0.getOpcode() == ISD::BITCAST)
11331  Op0 = Op0.getOperand(0);
11332  if (Op1.getOpcode() == ISD::BITCAST)
11333  Op1 = Op1.getOperand(0);
11334  if (Op0.getOpcode() == ARMISD::VMOVRRD &&
11335  Op0.getNode() == Op1.getNode() &&
11336  Op0.getResNo() == 0 && Op1.getResNo() == 1)
11337  return DAG.getNode(ISD::BITCAST, SDLoc(N),
11338  N->getValueType(0), Op0.getOperand(0));
11339  return SDValue();
11340 }
11341 
11342 /// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
11343 /// are normal, non-volatile loads. If so, it is profitable to bitcast an
11344 /// i64 vector to have f64 elements, since the value can then be loaded
11345 /// directly into a VFP register.
11346 static bool hasNormalLoadOperand(SDNode *N) {
11347  unsigned NumElts = N->getValueType(0).getVectorNumElements();
11348  for (unsigned i = 0; i < NumElts; ++i) {
11349  SDNode *Elt = N->getOperand(i).getNode();
11350  if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
11351  return true;
11352  }
11353  return false;
11354 }
11355 
11356 /// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
11357 /// ISD::BUILD_VECTOR.
11360  const ARMSubtarget *Subtarget) {
11361  // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
11362  // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
11363  // into a pair of GPRs, which is fine when the value is used as a scalar,
11364  // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
11365  SelectionDAG &DAG = DCI.DAG;
11366  if (N->getNumOperands() == 2)
11367  if (SDValue RV = PerformVMOVDRRCombine(N, DAG))
11368  return RV;
11369 
11370  // Load i64 elements as f64 values so that type legalization does not split
11371  // them up into i32 values.
11372  EVT VT = N->getValueType(0);
11374  return SDValue();
11375  SDLoc dl(N);
11377  unsigned NumElts = VT.getVectorNumElements();
11378  for (unsigned i = 0; i < NumElts; ++i) {
11379  SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
11380  Ops.push_back(V);
11381  // Make the DAGCombiner fold the bitcast.
11382  DCI.AddToWorklist(V.getNode());
11383  }
11384  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
11385  SDValue BV = DAG.getBuildVector(FloatVT, dl, Ops);
11386  return DAG.getNode(ISD::BITCAST, dl, VT, BV);
11387 }
11388 
11389 /// Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
11390 static SDValue
11392  // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR.
11393  // At that time, we may have inserted bitcasts from integer to float.
11394  // If these bitcasts have survived DAGCombine, change the lowering of this
11395  // BUILD_VECTOR in something more vector friendly, i.e., that does not
11396  // force to use floating point types.
11397 
11398  // Make sure we can change the type of the vector.
11399  // This is possible iff:
11400  // 1. The vector is only used in a bitcast to a integer type. I.e.,
11401  // 1.1. Vector is used only once.
11402  // 1.2. Use is a bit convert to an integer type.
11403  // 2. The size of its operands are 32-bits (64-bits are not legal).
11404  EVT VT = N->getValueType(0);
11405  EVT EltVT = VT.getVectorElementType();
11406 
11407  // Check 1.1. and 2.
11408  if (EltVT.getSizeInBits() != 32 || !N->hasOneUse())
11409  return SDValue();
11410 
11411  // By construction, the input type must be float.
11412  assert(EltVT == MVT::f32 && "Unexpected type!");
11413 
11414  // Check 1.2.
11415  SDNode *Use = *N->use_begin();
11416  if (Use->getOpcode() != ISD::BITCAST ||
11417  Use->getValueType(0).isFloatingPoint())
11418  return SDValue();
11419 
11420  // Check profitability.
11421  // Model is, if more than half of the relevant operands are bitcast from
11422  // i32, turn the build_vector into a sequence of insert_vector_elt.
11423  // Relevant operands are everything that is not statically
11424  // (i.e., at compile time) bitcasted.
11425  unsigned NumOfBitCastedElts = 0;
11426  unsigned NumElts = VT.getVectorNumElements();
11427  unsigned NumOfRelevantElts = NumElts;
11428  for (unsigned Idx = 0; Idx < NumElts; ++Idx) {
11429  SDValue Elt = N->getOperand(Idx);
11430  if (Elt->getOpcode() == ISD::BITCAST) {
11431  // Assume only bit cast to i32 will go away.
11432  if (Elt->getOperand(0).getValueType() == MVT::i32)
11433  ++NumOfBitCastedElts;
11434  } else if (Elt.isUndef() || isa<ConstantSDNode>(Elt))
11435  // Constants are statically casted, thus do not count them as
11436  // relevant operands.
11437  --NumOfRelevantElts;
11438  }
11439 
11440  // Check if more than half of the elements require a non-free bitcast.
11441  if (NumOfBitCastedElts <= NumOfRelevantElts / 2)
11442  return SDValue();
11443 
11444  SelectionDAG &DAG = DCI.DAG;
11445  // Create the new vector type.
11446  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
11447  // Check if the type is legal.
11448  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11449  if (!TLI.isTypeLegal(VecVT))
11450  return SDValue();
11451 
11452  // Combine:
11453  // ARMISD::BUILD_VECTOR E1, E2, ..., EN.
11454  // => BITCAST INSERT_VECTOR_ELT
11455  // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),
11456  // (BITCAST EN), N.
11457  SDValue Vec = DAG.getUNDEF(VecVT);
11458  SDLoc dl(N);
11459  for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
11460  SDValue V = N->getOperand(Idx);
11461  if (V.isUndef())
11462  continue;
11463  if (V.getOpcode() == ISD::BITCAST &&
11464  V->getOperand(0).getValueType() == MVT::i32)
11465  // Fold obvious case.
11466  V = V.getOperand(0);
11467  else {
11468  V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
11469  // Make the DAGCombiner fold the bitcasts.
11470  DCI.AddToWorklist(V.getNode());
11471  }
11472  SDValue LaneIdx = DAG.getConstant(Idx, dl, MVT::i32);
11473  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx);
11474  }
11475  Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec);
11476  // Make the DAGCombiner fold the bitcasts.
11477  DCI.AddToWorklist(Vec.getNode());
11478  return Vec;
11479 }
11480 
11481 /// PerformInsertEltCombine - Target-specific dag combine xforms for
11482 /// ISD::INSERT_VECTOR_ELT.
11485  // Bitcast an i64 load inserted into a vector to f64.
11486  // Otherwise, the i64 value will be legalized to a pair of i32 values.
11487  EVT VT = N->getValueType(0);
11488  SDNode *Elt = N->getOperand(1).getNode();
11489  if (VT.getVectorElementType() != MVT::i64 ||
11490  !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
11491  return SDValue();
11492 
11493  SelectionDAG &DAG = DCI.DAG;
11494  SDLoc dl(N);
11495  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
11496  VT.getVectorNumElements());
11497  SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
11498  SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
11499  // Make the DAGCombiner fold the bitcasts.
11500  DCI.AddToWorklist(Vec.getNode());
11501  DCI.AddToWorklist(V.getNode());
11502  SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
11503  Vec, V, N->getOperand(2));
11504  return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
11505 }
11506 
11507 /// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
11508 /// ISD::VECTOR_SHUFFLE.
11510  // The LLVM shufflevector instruction does not require the shuffle mask
11511  // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
11512  // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
11513  // operands do not match the mask length, they are extended by concatenating
11514  // them with undef vectors. That is probably the right thing for other
11515  // targets, but for NEON it is better to concatenate two double-register
11516  // size vector operands into a single quad-register size vector. Do that
11517  // transformation here:
11518  // shuffle(concat(v1, undef), concat(v2, undef)) ->
11519  // shuffle(concat(v1, v2), undef)
11520  SDValue Op0 = N->getOperand(0);
11521  SDValue Op1 = N->getOperand(1);
11522  if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
11523  Op1.getOpcode() != ISD::CONCAT_VECTORS ||
11524  Op0.getNumOperands() != 2 ||
11525  Op1.getNumOperands() != 2)
11526  return SDValue();
11527  SDValue Concat0Op1 = Op0.getOperand(1);
11528  SDValue Concat1Op1 = Op1.getOperand(1);
11529  if (!Concat0Op1.isUndef() || !Concat1Op1.isUndef())
11530  return SDValue();
11531  // Skip the transformation if any of the types are illegal.
11532  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11533  EVT VT = N->getValueType(0);
11534  if (!TLI.isTypeLegal(VT) ||
11535  !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
11536  !TLI.isTypeLegal(Concat1Op1.getValueType()))
11537  return SDValue();
11538 
11539  SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
11540  Op0.getOperand(0), Op1.getOperand(0));
11541  // Translate the shuffle mask.
11542  SmallVector<int, 16> NewMask;
11543  unsigned NumElts = VT.getVectorNumElements();
11544  unsigned HalfElts = NumElts/2;
11545  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
11546  for (unsigned n = 0; n < NumElts; ++n) {
11547  int MaskElt = SVN->getMaskElt(n);
11548  int NewElt = -1;
11549  if (MaskElt < (int)HalfElts)
11550  NewElt = MaskElt;
11551  else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
11552  NewElt = HalfElts + MaskElt - NumElts;
11553  NewMask.push_back(NewElt);
11554  }
11555  return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,
11556  DAG.getUNDEF(VT), NewMask);
11557 }
11558 
11559 /// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
11560 /// NEON load/store intrinsics, and generic vector load/stores, to merge
11561 /// base address updates.
11562 /// For generic load/stores, the memory type is assumed to be a vector.
11563 /// The caller is assumed to have checked legality.
11566  SelectionDAG &DAG = DCI.DAG;
11567  const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
11569  const bool isStore = N->getOpcode() == ISD::STORE;
11570  const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
11571  SDValue Addr = N->getOperand(AddrOpIdx);
11572  MemSDNode *MemN = cast<MemSDNode>(N);
11573  SDLoc dl(N);
11574 
11575  // Search for a use of the address operand that is an increment.
11576  for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
11577  UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
11578  SDNode *User = *UI;
11579  if (User->getOpcode() != ISD::ADD ||
11580  UI.getUse().getResNo() != Addr.getResNo())
11581  continue;
11582 
11583  // Check that the add is independent of the load/store. Otherwise, folding
11584  // it would create a cycle. We can avoid searching through Addr as it's a
11585  // predecessor to both.
11588  Visited.insert(Addr.getNode());
11589  Worklist.push_back(N);
11590  Worklist.push_back(User);
11591  if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
11592  SDNode::hasPredecessorHelper(User, Visited, Worklist))
11593  continue;
11594 
11595  // Find the new opcode for the updating load/store.
11596  bool isLoadOp = true;
11597  bool isLaneOp = false;
11598  unsigned NewOpc = 0;
11599  unsigned NumVecs = 0;
11600  if (isIntrinsic) {
11601  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
11602  switch (IntNo) {
11603  default: llvm_unreachable("unexpected intrinsic for Neon base update");
11605  NumVecs = 1; break;
11607  NumVecs = 2; break;
11609  NumVecs = 3; break;
11611  NumVecs = 4; break;
11615  // TODO: Support updating VLDxDUP nodes. For now, we just skip
11616  // combining base updates for such intrinsics.
11617  continue;
11619  NumVecs = 2; isLaneOp = true; break;
11621  NumVecs = 3; isLaneOp = true; break;
11623  NumVecs = 4; isLaneOp = true; break;
11625  NumVecs = 1; isLoadOp = false; break;
11627  NumVecs = 2; isLoadOp = false; break;
11629  NumVecs = 3; isLoadOp = false; break;
11631  NumVecs = 4; isLoadOp = false; break;
11633  NumVecs = 2; isLoadOp = false; isLaneOp = true; break;
11635  NumVecs = 3; isLoadOp = false; isLaneOp = true; break;
11637  NumVecs = 4; isLoadOp = false; isLaneOp = true; break;
11638  }
11639  } else {
11640  isLaneOp = true;
11641  switch (N->getOpcode()) {
11642  default: llvm_unreachable("unexpected opcode for Neon base update");
11643  case ARMISD::VLD1DUP: NewOpc = ARMISD::VLD1DUP_UPD; NumVecs = 1; break;
11644  case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
11645  case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
11646  case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
11647  case ISD::LOAD: NewOpc = ARMISD::VLD1_UPD;
11648  NumVecs = 1; isLaneOp = false; break;
11649  case ISD::STORE: NewOpc = ARMISD::VST1_UPD;
11650  NumVecs = 1; isLaneOp = false; isLoadOp = false; break;
11651  }
11652  }
11653 
11654  // Find the size of memory referenced by the load/store.
11655  EVT VecTy;
11656  if (isLoadOp) {
11657  VecTy = N->getValueType(0);
11658  } else if (isIntrinsic) {
11659  VecTy = N->getOperand(AddrOpIdx+1).getValueType();
11660  } else {
11661  assert(isStore && "Node has to be a load, a store, or an intrinsic!");
11662  VecTy = N->getOperand(1).getValueType();
11663  }
11664 
11665  unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
11666  if (isLaneOp)
11667  NumBytes /= VecTy.getVectorNumElements();
11668 
11669  // If the increment is a constant, it must match the memory ref size.
11670  SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
11672  if (NumBytes >= 3 * 16 && (!CInc || CInc->getZExtValue() != NumBytes)) {
11673  // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
11674  // separate instructions that make it harder to use a non-constant update.
11675  continue;
11676  }
11677 
11678  // OK, we found an ADD we can fold into the base update.
11679  // Now, create a _UPD node, taking care of not breaking alignment.
11680 
11681  EVT AlignedVecTy = VecTy;
11682  unsigned Alignment = MemN->getAlignment();
11683 
11684  // If this is a less-than-standard-aligned load/store, change the type to
11685  // match the standard alignment.
11686  // The alignment is overlooked when selecting _UPD variants; and it's
11687  // easier to introduce bitcasts here than fix that.
11688  // There are 3 ways to get to this base-update combine:
11689  // - intrinsics: they are assumed to be properly aligned (to the standard
11690  // alignment of the memory type), so we don't need to do anything.
11691  // - ARMISD::VLDx nodes: they are only generated from the aforementioned
11692  // intrinsics, so, likewise, there's nothing to do.
11693  // - generic load/store instructions: the alignment is specified as an
11694  // explicit operand, rather than implicitly as the standard alignment
11695  // of the memory type (like the intrisics). We need to change the
11696  // memory type to match the explicit alignment. That way, we don't
11697  // generate non-standard-aligned ARMISD::VLDx nodes.
11698  if (isa<LSBaseSDNode>(N)) {
11699  if (Alignment == 0)
11700  Alignment = 1;
11701  if (Alignment < VecTy.getScalarSizeInBits() / 8) {
11702  MVT EltTy = MVT::getIntegerVT(Alignment * 8);
11703  assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
11704  assert(!isLaneOp && "Unexpected generic load/store lane.");
11705  unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
11706  AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
11707  }
11708  // Don't set an explicit alignment on regular load/stores that we want
11709  // to transform to VLD/VST 1_UPD nodes.
11710  // This matches the behavior of regular load/stores, which only get an
11711  // explicit alignment if the MMO alignment is larger than the standard
11712  // alignment of the memory type.
11713  // Intrinsics, however, always get an explicit alignment, set to the
11714  // alignment of the MMO.
11715  Alignment = 1;
11716  }
11717 
11718  // Create the new updating load/store node.
11719  // First, create an SDVTList for the new updating node's results.
11720  EVT Tys[6];
11721  unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
11722  unsigned n;
11723  for (n = 0; n < NumResultVecs; ++n)
11724  Tys[n] = AlignedVecTy;
11725  Tys[n++] = MVT::i32;
11726  Tys[n] = MVT::Other;
11727  SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2));
11728 
11729  // Then, gather the new node's operands.
11731  Ops.push_back(N->getOperand(0)); // incoming chain
11732  Ops.push_back(N->getOperand(AddrOpIdx));
11733  Ops.push_back(Inc);
11734 
11735  if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
11736  // Try to match the intrinsic's signature
11737  Ops.push_back(StN->getValue());
11738  } else {
11739  // Loads (and of course intrinsics) match the intrinsics' signature,
11740  // so just add all but the alignment operand.
11741  for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands() - 1; ++i)
11742  Ops.push_back(N->getOperand(i));
11743  }
11744 
11745  // For all node types, the alignment operand is always the last one.
11746  Ops.push_back(DAG.getConstant(Alignment, dl, MVT::i32));
11747 
11748  // If this is a non-standard-aligned STORE, the penultimate operand is the
11749  // stored value. Bitcast it to the aligned type.
11750  if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
11751  SDValue &StVal = Ops[Ops.size()-2];
11752  StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
11753  }
11754 
11755  EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy;
11756  SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT,
11757  MemN->getMemOperand());
11758 
11759  // Update the uses.
11760  SmallVector<SDValue, 5> NewResults;
11761  for (unsigned i = 0; i < NumResultVecs; ++i)
11762  NewResults.push_back(SDValue(UpdN.getNode(), i));
11763 
11764  // If this is an non-standard-aligned LOAD, the first result is the loaded
11765  // value. Bitcast it to the expected result type.
11766  if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
11767  SDValue &LdVal = NewResults[0];
11768  LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal);
11769  }
11770 
11771  NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
11772  DCI.CombineTo(N, NewResults);
11773  DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
11774 
11775  break;
11776  }
11777  return SDValue();
11778 }
11779 
11782  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
11783  return SDValue();
11784 
11785  return CombineBaseUpdate(N, DCI);
11786 }
11787 
11788 /// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
11789 /// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
11790 /// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
11791 /// return true.
11793  SelectionDAG &DAG = DCI.DAG;
11794  EVT VT = N->getValueType(0);
11795  // vldN-dup instructions only support 64-bit vectors for N > 1.
11796  if (!VT.is64BitVector())
11797  return false;
11798 
11799  // Check if the VDUPLANE operand is a vldN-dup intrinsic.
11800  SDNode *VLD = N->getOperand(0).getNode();
11801  if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
11802  return false;
11803  unsigned NumVecs = 0;
11804  unsigned NewOpc = 0;
11805  unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
11806  if (IntNo == Intrinsic::arm_neon_vld2lane) {
11807  NumVecs = 2;
11808  NewOpc = ARMISD::VLD2DUP;
11809  } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
11810  NumVecs = 3;
11811  NewOpc = ARMISD::VLD3DUP;
11812  } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
11813  NumVecs = 4;
11814  NewOpc = ARMISD::VLD4DUP;
11815  } else {
11816  return false;
11817  }
11818 
11819  // First check that all the vldN-lane uses are VDUPLANEs and that the lane
11820  // numbers match the load.
11821  unsigned VLDLaneNo =
11822  cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue();
11823  for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
11824  UI != UE; ++UI) {
11825  // Ignore uses of the chain result.
11826  if (UI.getUse().getResNo() == NumVecs)
11827  continue;
11828  SDNode *User = *UI;
11829  if (User->getOpcode() != ARMISD::VDUPLANE ||
11830  VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
11831  return false;
11832  }
11833 
11834  // Create the vldN-dup node.
11835  EVT Tys[5];
11836  unsigned n;
11837  for (n = 0; n < NumVecs; ++n)
11838  Tys[n] = VT;
11839  Tys[n] = MVT::Other;
11840  SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumVecs+1));
11841  SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
11842  MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
11843  SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
11844  Ops, VLDMemInt->getMemoryVT(),
11845  VLDMemInt->getMemOperand());
11846 
11847  // Update the uses.
11848  for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
11849  UI != UE; ++UI) {
11850  unsigned ResNo = UI.getUse().getResNo();
11851  // Ignore uses of the chain result.
11852  if (ResNo == NumVecs)
11853  continue;
11854  SDNode *User = *UI;
11855  DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
11856  }
11857 
11858  // Now the vldN-lane intrinsic is dead except for its chain result.
11859  // Update uses of the chain.
11860  std::vector<SDValue> VLDDupResults;
11861  for (unsigned n = 0; n < NumVecs; ++n)
11862  VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
11863  VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
11864  DCI.CombineTo(VLD, VLDDupResults);
11865 
11866  return true;
11867 }
11868 
11869 /// PerformVDUPLANECombine - Target-specific dag combine xforms for
11870 /// ARMISD::VDUPLANE.
11873  SDValue Op = N->getOperand(0);
11874 
11875  // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
11876  // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
11877  if (CombineVLDDUP(N, DCI))
11878  return SDValue(N, 0);
11879 
11880  // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
11881  // redundant. Ignore bit_converts for now; element sizes are checked below.
11882  while (Op.getOpcode() == ISD::BITCAST)
11883  Op = Op.getOperand(0);
11884  if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
11885  return SDValue();
11886 
11887  // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
11888  unsigned EltSize = Op.getScalarValueSizeInBits();
11889  // The canonical VMOV for a zero vector uses a 32-bit element size.
11890  unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
11891  unsigned EltBits;
11892  if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
11893  EltSize = 8;
11894  EVT VT = N->getValueType(0);
11895  if (EltSize > VT.getScalarSizeInBits())
11896  return SDValue();
11897 
11898  return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
11899 }
11900 
11901 /// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
11904  SelectionDAG &DAG = DCI.DAG;
11905  SDValue Op = N->getOperand(0);
11906 
11907  // Match VDUP(LOAD) -> VLD1DUP.
11908  // We match this pattern here rather than waiting for isel because the
11909  // transform is only legal for unindexed loads.
11911  if (LD && Op.hasOneUse() && LD->isUnindexed() &&
11912  LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) {
11913  SDValue Ops[] = { LD->getOperand(0), LD->getOperand(1),
11914  DAG.getConstant(LD->getAlignment(), SDLoc(N), MVT::i32) };
11915  SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other);
11916  SDValue VLDDup = DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys,
11917  Ops, LD->getMemoryVT(),
11918  LD->getMemOperand());
11919  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), VLDDup.getValue(1));
11920  return VLDDup;
11921  }
11922 
11923  return SDValue();
11924 }
11925 
11928  EVT VT = N->getValueType(0);
11929 
11930  // If this is a legal vector load, try to combine it into a VLD1_UPD.
11931  if (ISD::isNormalLoad(N) && VT.isVector() &&
11933  return CombineBaseUpdate(N, DCI);
11934 
11935  return SDValue();
11936 }
11937 
11938 /// PerformSTORECombine - Target-specific dag combine xforms for
11939 /// ISD::STORE.
11942  StoreSDNode *St = cast<StoreSDNode>(N);
11943  if (St->isVolatile())
11944  return SDValue();
11945 
11946  // Optimize trunc store (of multiple scalars) to shuffle and store. First,
11947  // pack all of the elements in one place. Next, store to memory in fewer
11948  // chunks.
11949  SDValue StVal = St->getValue();
11950  EVT VT = StVal.getValueType();
11951  if (St->isTruncatingStore() && VT.isVector()) {
11952  SelectionDAG &DAG = DCI.DAG;
11953  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11954  EVT StVT = St->getMemoryVT();
11955  unsigned NumElems = VT.getVectorNumElements();
11956  assert(StVT != VT && "Cannot truncate to the same type");
11957  unsigned FromEltSz = VT.getScalarSizeInBits();
11958  unsigned ToEltSz = StVT.getScalarSizeInBits();
11959 
11960  // From, To sizes and ElemCount must be pow of two
11961  if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue();
11962 
11963  // We are going to use the original vector elt for storing.
11964  // Accumulated smaller vector elements must be a multiple of the store size.
11965  if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue();
11966 
11967  unsigned SizeRatio = FromEltSz / ToEltSz;
11968  assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
11969 
11970  // Create a type on which we perform the shuffle.
11971  EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
11972  NumElems*SizeRatio);
11973  assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
11974 
11975  SDLoc DL(St);
11976  SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
11977  SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
11978  for (unsigned i = 0; i < NumElems; ++i)
11979  ShuffleVec[i] = DAG.getDataLayout().isBigEndian()
11980  ? (i + 1) * SizeRatio - 1
11981  : i * SizeRatio;
11982 
11983  // Can't shuffle using an illegal type.
11984  if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
11985 
11986  SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec,
11987  DAG.getUNDEF(WideVec.getValueType()),
11988  ShuffleVec);
11989  // At this point all of the data is stored at the bottom of the
11990  // register. We now need to save it to mem.
11991 
11992  // Find the largest store unit
11993  MVT StoreType = MVT::i8;
11994  for (MVT Tp : MVT::integer_valuetypes()) {
11995  if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
11996  StoreType = Tp;
11997  }
11998  // Didn't find a legal store type.
11999  if (!TLI.isTypeLegal(StoreType))
12000  return SDValue();
12001 
12002  // Bitcast the original vector into a vector of store-size units
12003  EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
12004  StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
12005  assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
12006  SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
12007  SmallVector<SDValue, 8> Chains;
12008  SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,
12009  TLI.getPointerTy(DAG.getDataLayout()));
12010  SDValue BasePtr = St->getBasePtr();
12011 
12012  // Perform one or more big stores into memory.
12013  unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits();
12014  for (unsigned I = 0; I < E; I++) {
12015  SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
12016  StoreType, ShuffWide,
12017  DAG.getIntPtrConstant(I, DL));
12018  SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr,
12019  St->getPointerInfo(), St->getAlignment(),
12020  St->getMemOperand()->getFlags());
12021  BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
12022  Increment);
12023  Chains.push_back(Ch);
12024  }
12025  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
12026  }
12027 
12028  if (!ISD::isNormalStore(St))
12029  return SDValue();
12030 
12031  // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
12032  // ARM stores of arguments in the same cache line.
12033  if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
12034  StVal.getNode()->hasOneUse()) {
12035  SelectionDAG &DAG = DCI.DAG;
12036  bool isBigEndian = DAG.getDataLayout().isBigEndian();
12037  SDLoc DL(St);
12038  SDValue BasePtr = St->getBasePtr();
12039  SDValue NewST1 = DAG.getStore(
12040  St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0),
12041  BasePtr, St->getPointerInfo(), St->getAlignment(),
12042  St->getMemOperand()->getFlags());
12043 
12044  SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
12045  DAG.getConstant(4, DL, MVT::i32));
12046  return DAG.getStore(NewST1.getValue(0), DL,
12047  StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
12048  OffsetPtr, St->getPointerInfo(),
12049  std::min(4U, St->getAlignment() / 2),
12050  St->getMemOperand()->getFlags());
12051  }
12052 
12053  if (StVal.getValueType() == MVT::i64 &&
12054  StVal.getNode()->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
12055 
12056  // Bitcast an i64 store extracted from a vector to f64.
12057  // Otherwise, the i64 value will be legalized to a pair of i32 values.
12058  SelectionDAG &DAG = DCI.DAG;
12059  SDLoc dl(StVal);
12060  SDValue IntVec = StVal.getOperand(0);
12061  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
12062  IntVec.getValueType().getVectorNumElements());
12063  SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
12064  SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
12065  Vec, StVal.getOperand(1));
12066  dl = SDLoc(N);
12067  SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
12068  // Make the DAGCombiner fold the bitcasts.
12069  DCI.AddToWorklist(Vec.getNode());
12070  DCI.AddToWorklist(ExtElt.getNode());
12071  DCI.AddToWorklist(V.getNode());
12072  return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
12073  St->getPointerInfo(), St->getAlignment(),
12074  St->getMemOperand()->getFlags(), St->getAAInfo());
12075  }
12076 
12077  // If this is a legal vector store, try to combine it into a VST1_UPD.
12078  if (ISD::isNormalStore(N) && VT.isVector() &&
12080  return CombineBaseUpdate(N, DCI);
12081 
12082  return SDValue();
12083 }
12084 
12085 /// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
12086 /// can replace combinations of VMUL and VCVT (floating-point to integer)
12087 /// when the VMUL has a constant operand that is a power of 2.
12088 ///
12089 /// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
12090 /// vmul.f32 d16, d17, d16
12091 /// vcvt.s32.f32 d16, d16
12092 /// becomes:
12093 /// vcvt.s32.f32 d16, d16, #3
12095  const ARMSubtarget *Subtarget) {
12096  if (!Subtarget->hasNEON())
12097  return SDValue();
12098 
12099  SDValue Op = N->getOperand(0);
12100  if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
12101  Op.getOpcode() != ISD::FMUL)
12102  return SDValue();
12103 
12104  SDValue ConstVec = Op->getOperand(1);
12105  if (!isa<BuildVectorSDNode>(ConstVec))
12106  return SDValue();
12107 
12108  MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
12109  uint32_t FloatBits = FloatTy.getSizeInBits();
12111  uint32_t IntBits = IntTy.getSizeInBits();
12112  unsigned NumLanes = Op.getValueType().getVectorNumElements();
12113  if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) {
12114  // These instructions only exist converting from f32 to i32. We can handle
12115  // smaller integers by generating an extra truncate, but larger ones would
12116  // be lossy. We also can't handle more then 4 lanes, since these intructions
12117  // only support v2i32/v4i32 types.
12118  return SDValue();
12119  }
12120 
12121  BitVector UndefElements;
12122  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
12123  int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
12124  if (C == -1 || C == 0 || C > 32)
12125  return SDValue();
12126 
12127  SDLoc dl(N);
12128  bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
12129  unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
12131  SDValue FixConv = DAG.getNode(
12132  ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
12133  DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0),
12134  DAG.getConstant(C, dl, MVT::i32));
12135 
12136  if (IntBits < FloatBits)
12137  FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);
12138 
12139  return FixConv;
12140 }
12141 
12142 /// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
12143 /// can replace combinations of VCVT (integer to floating-point) and VDIV
12144 /// when the VDIV has a constant operand that is a power of 2.
12145 ///
12146 /// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
12147 /// vcvt.f32.s32 d16, d16
12148 /// vdiv.f32 d16, d17, d16
12149 /// becomes:
12150 /// vcvt.f32.s32 d16, d16, #3
12152  const ARMSubtarget *Subtarget) {
12153  if (!Subtarget->hasNEON())
12154  return SDValue();
12155 
12156  SDValue Op = N->getOperand(0);
12157  unsigned OpOpcode = Op.getNode()->getOpcode();
12158  if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() ||
12159  (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
12160  return SDValue();
12161 
12162  SDValue ConstVec = N->getOperand(1);
12163  if (!isa<BuildVectorSDNode>(ConstVec))
12164  return SDValue();
12165 
12166  MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
12167  uint32_t FloatBits = FloatTy.getSizeInBits();
12169  uint32_t IntBits = IntTy.getSizeInBits();
12170  unsigned NumLanes = Op.getValueType().getVectorNumElements();
12171  if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) {
12172  // These instructions only exist converting from i32 to f32. We can handle
12173  // smaller integers by generating an extra extend, but larger ones would
12174  // be lossy. We also can't handle more then 4 lanes, since these intructions
12175  // only support v2i32/v4i32 types.
12176  return SDValue();
12177  }
12178 
12179  BitVector UndefElements;
12180  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
12181  int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
12182  if (C == -1 || C == 0 || C > 32)
12183  return SDValue();
12184 
12185  SDLoc dl(N);
12186  bool isSigned = OpOpcode == ISD::SINT_TO_FP;
12187  SDValue ConvInput = Op.getOperand(0);
12188  if (IntBits < FloatBits)
12189  ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
12190  dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
12191  ConvInput);
12192 
12193  unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
12195  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
12196  Op.getValueType(),
12197  DAG.getConstant(IntrinsicOpcode, dl, MVT::i32),
12198  ConvInput, DAG.getConstant(C, dl, MVT::i32));
12199 }
12200 
12201 /// Getvshiftimm - Check if this is a valid build_vector for the immediate
12202 /// operand of a vector shift operation, where all the elements of the
12203 /// build_vector must have the same constant integer value.
12204 static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
12205  // Ignore bit_converts.
12206  while (Op.getOpcode() == ISD::BITCAST)
12207  Op = Op.getOperand(0);
12209  APInt SplatBits, SplatUndef;
12210  unsigned SplatBitSize;
12211  bool HasAnyUndefs;
12212  if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
12213  HasAnyUndefs, ElementBits) ||
12214  SplatBitSize > ElementBits)
12215  return false;
12216  Cnt = SplatBits.getSExtValue();
12217  return true;
12218 }
12219 
12220 /// isVShiftLImm - Check if this is a valid build_vector for the immediate
12221 /// operand of a vector shift left operation. That value must be in the range:
12222 /// 0 <= Value < ElementBits for a left shift; or
12223 /// 0 <= Value <= ElementBits for a long left shift.
12224 static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
12225  assert(VT.isVector() && "vector shift count is not a vector type");
12226  int64_t ElementBits = VT.getScalarSizeInBits();
12227  if (! getVShiftImm(Op, ElementBits, Cnt))
12228  return false;
12229  return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
12230 }
12231 
12232 /// isVShiftRImm - Check if this is a valid build_vector for the immediate
12233 /// operand of a vector shift right operation. For a shift opcode, the value
12234 /// is positive, but for an intrinsic the value count must be negative. The
12235 /// absolute value must be in the range:
12236 /// 1 <= |Value| <= ElementBits for a right shift; or
12237 /// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
12238 static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
12239  int64_t &Cnt) {
12240  assert(VT.isVector() && "vector shift count is not a vector type");
12241  int64_t ElementBits = VT.getScalarSizeInBits();
12242  if (! getVShiftImm(Op, ElementBits, Cnt))
12243  return false;
12244  if (!isIntrinsic)
12245  return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
12246  if (Cnt >= -(isNarrow ? ElementBits/2 : ElementBits) && Cnt <= -1) {
12247  Cnt = -Cnt;
12248  return true;
12249  }
12250  return false;
12251 }
12252 
12253 /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
12255  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
12256  switch (IntNo) {
12257  default:
12258  // Don't do anything for most intrinsics.
12259  break;
12260 
12261  // Vector shifts: check for immediate versions and lower them.
12262  // Note: This is done during DAG combining instead of DAG legalizing because
12263  // the build_vectors for 64-bit vector element shift counts are generally
12264  // not legal, and it is hard to see their values after they get legalized to
12265  // loads from a constant pool.
12280  EVT VT = N->getOperand(1).getValueType();
12281  int64_t Cnt;
12282  unsigned VShiftOpc = 0;
12283 
12284  switch (IntNo) {
12287  if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
12288  VShiftOpc = ARMISD::VSHL;
12289  break;
12290  }
12291  if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
12292  VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
12294  break;
12295  }
12296  return SDValue();
12297 
12300  if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
12301  break;
12302  return SDValue();
12303 
12306  if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
12307  break;
12308  return SDValue();
12309 
12311  if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
12312  break;
12313  llvm_unreachable("invalid shift count for vqshlu intrinsic");
12314 
12322  // Narrowing shifts require an immediate right shift.
12323  if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
12324  break;
12325  llvm_unreachable("invalid shift count for narrowing vector shift "
12326  "intrinsic");
12327 
12328  default:
12329  llvm_unreachable("unhandled vector shift");
12330  }
12331 
12332  switch (IntNo) {
12335  // Opcode already set above.
12336  break;
12338  VShiftOpc = ARMISD::VRSHRs; break;
12340  VShiftOpc = ARMISD::VRSHRu; break;
12342  VShiftOpc = ARMISD::VRSHRN; break;
12344  VShiftOpc = ARMISD::VQSHLs; break;
12346  VShiftOpc = ARMISD::VQSHLu; break;
12348  VShiftOpc = ARMISD::VQSHLsu; break;
12350  VShiftOpc = ARMISD::VQSHRNs; break;
12352  VShiftOpc = ARMISD::VQSHRNu; break;
12354  VShiftOpc = ARMISD::VQSHRNsu; break;
12356  VShiftOpc = ARMISD::VQRSHRNs; break;
12358  VShiftOpc = ARMISD::VQRSHRNu; break;
12360  VShiftOpc = ARMISD::VQRSHRNsu; break;
12361  }
12362 
12363  SDLoc dl(N);
12364  return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
12365  N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32));
12366  }
12367 
12369  EVT VT = N->getOperand(1).getValueType();
12370  int64_t Cnt;
12371  unsigned VShiftOpc = 0;
12372 
12373  if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
12374  VShiftOpc = ARMISD::VSLI;
12375  else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
12376  VShiftOpc = ARMISD::VSRI;
12377  else {
12378  llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
12379  }
12380 
12381  SDLoc dl(N);
12382  return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
12383  N->getOperand(1), N->getOperand(2),
12384  DAG.getConstant(Cnt, dl, MVT::i32));
12385  }
12386 
12389  // No immediate versions of these to check for.
12390  break;
12391  }
12392 
12393  return SDValue();
12394 }
12395 
12396 /// PerformShiftCombine - Checks for immediate versions of vector shifts and
12397 /// lowers them. As with the vector shift intrinsics, this is done during DAG
12398 /// combining instead of DAG legalizing because the build_vectors for 64-bit
12399 /// vector element shift counts are generally not legal, and it is hard to see
12400 /// their values after they get legalized to loads from a constant pool.
12402  const ARMSubtarget *ST) {
12403  EVT VT = N->getValueType(0);
12404  if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) {
12405  // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high
12406  // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16.
12407  SDValue N1 = N->getOperand(1);
12408  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
12409  SDValue N0 = N->getOperand(0);
12410  if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP &&
12411  DAG.MaskedValueIsZero(N0.getOperand(0),
12412  APInt::getHighBitsSet(32, 16)))
12413  return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1);
12414  }
12415  }
12416 
12417  // Nothing to be done for scalar shifts.
12418  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12419  if (!VT.isVector() || !TLI.isTypeLegal(VT))
12420  return SDValue();
12421 
12422  assert(ST->hasNEON() && "unexpected vector shift");
12423  int64_t Cnt;
12424 
12425  switch (N->getOpcode()) {
12426  default: llvm_unreachable("unexpected shift opcode");
12427 
12428  case ISD::SHL:
12429  if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
12430  SDLoc dl(N);
12431  return DAG.getNode(ARMISD::VSHL, dl, VT, N->getOperand(0),
12432  DAG.getConstant(Cnt, dl, MVT::i32));
12433  }
12434  break;
12435 
12436  case ISD::SRA:
12437  case ISD::SRL:
12438  if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
12439  unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
12441  SDLoc dl(N);
12442  return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
12443  DAG.getConstant(Cnt, dl, MVT::i32));
12444  }
12445  }
12446  return SDValue();
12447 }
12448 
12449 /// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
12450 /// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
12452  const ARMSubtarget *ST) {
12453  SDValue N0 = N->getOperand(0);
12454 
12455  // Check for sign- and zero-extensions of vector extract operations of 8-
12456  // and 16-bit vector elements. NEON supports these directly. They are
12457  // handled during DAG combining because type legalization will promote them
12458  // to 32-bit types and it is messy to recognize the operations after that.
12459  if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
12460  SDValue Vec = N0.getOperand(0);
12461  SDValue Lane = N0.getOperand(1);
12462  EVT VT = N->getValueType(0);
12463  EVT EltVT = N0.getValueType();
12464  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12465 
12466  if (VT == MVT::i32 &&
12467  (EltVT == MVT::i8 || EltVT == MVT::i16) &&
12468  TLI.isTypeLegal(Vec.getValueType()) &&
12469  isa<ConstantSDNode>(Lane)) {
12470 
12471  unsigned Opc = 0;
12472  switch (N->getOpcode()) {
12473  default: llvm_unreachable("unexpected opcode");
12474  case ISD::SIGN_EXTEND:
12475  Opc = ARMISD::VGETLANEs;
12476  break;
12477  case ISD::ZERO_EXTEND:
12478  case ISD::ANY_EXTEND:
12479  Opc = ARMISD::VGETLANEu;
12480  break;
12481  }
12482  return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane);
12483  }
12484  }
12485 
12486  return SDValue();
12487 }
12488 
12491  if (!C)
12492  return nullptr;
12493  const APInt *CV = &C->getAPIntValue();
12494  return CV->isPowerOf2() ? CV : nullptr;
12495 }
12496 
12498  // If we have a CMOV, OR and AND combination such as:
12499  // if (x & CN)
12500  // y |= CM;
12501  //
12502  // And:
12503  // * CN is a single bit;
12504  // * All bits covered by CM are known zero in y
12505  //
12506  // Then we can convert this into a sequence of BFI instructions. This will
12507  // always be a win if CM is a single bit, will always be no worse than the
12508  // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is
12509  // three bits (due to the extra IT instruction).
12510 
12511  SDValue Op0 = CMOV->getOperand(0);
12512  SDValue Op1 = CMOV->getOperand(1);
12513  auto CCNode = cast<ConstantSDNode>(CMOV->getOperand(2));
12514  auto CC = CCNode->getAPIntValue().getLimitedValue();
12515  SDValue CmpZ = CMOV->getOperand(4);
12516 
12517  // The compare must be against zero.
12518  if (!isNullConstant(CmpZ->getOperand(1)))
12519  return SDValue();
12520 
12521  assert(CmpZ->getOpcode() == ARMISD::CMPZ);
12522  SDValue And = CmpZ->getOperand(0);
12523  if (And->getOpcode() != ISD::AND)
12524  return SDValue();
12525  const APInt *AndC = isPowerOf2Constant(And->getOperand(1));
12526  if (!AndC)
12527  return SDValue();
12528  SDValue X = And->getOperand(0);
12529 
12530  if (CC == ARMCC::EQ) {
12531  // We're performing an "equal to zero" compare. Swap the operands so we
12532  // canonicalize on a "not equal to zero" compare.
12533  std::swap(Op0, Op1);
12534  } else {
12535  assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?");
12536  }
12537 
12538  if (Op1->getOpcode() != ISD::OR)
12539  return SDValue();
12540 
12542  if (!OrC)
12543  return SDValue();
12544  SDValue Y = Op1->getOperand(0);
12545 
12546  if (Op0 != Y)
12547  return SDValue();
12548 
12549  // Now, is it profitable to continue?
12550  APInt OrCI = OrC->getAPIntValue();
12551  unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
12552  if (OrCI.countPopulation() > Heuristic)
12553  return SDValue();
12554 
12555  // Lastly, can we determine that the bits defined by OrCI
12556  // are zero in Y?
12557  KnownBits Known = DAG.computeKnownBits(Y);
12558  if ((OrCI & Known.Zero) != OrCI)
12559  return SDValue();
12560 
12561  // OK, we can do the combine.
12562  SDValue V = Y;
12563  SDLoc dl(X);
12564  EVT VT = X.getValueType();
12565  unsigned BitInX = AndC->logBase2();
12566 
12567  if (BitInX != 0) {
12568  // We must shift X first.
12569  X = DAG.getNode(ISD::SRL, dl, VT, X,
12570  DAG.getConstant(BitInX, dl, VT));
12571  }
12572 
12573  for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();
12574  BitInY < NumActiveBits; ++BitInY) {
12575  if (OrCI[BitInY] == 0)
12576  continue;
12577  APInt Mask(VT.getSizeInBits(), 0);
12578  Mask.setBit(BitInY);
12579  V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,
12580  // Confusingly, the operand is an *inverted* mask.
12581  DAG.getConstant(~Mask, dl, VT));
12582  }
12583 
12584  return V;
12585 }
12586 
12587 /// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
12588 SDValue
12590  SDValue Cmp = N->getOperand(4);
12591  if (Cmp.getOpcode() != ARMISD::CMPZ)
12592  // Only looking at NE cases.
12593  return SDValue();
12594 
12595  EVT VT = N->getValueType(0);
12596  SDLoc dl(N);
12597  SDValue LHS = Cmp.getOperand(0);
12598  SDValue RHS = Cmp.getOperand(1);
12599  SDValue Chain = N->getOperand(0);
12600  SDValue BB = N->getOperand(1);
12601  SDValue ARMcc = N->getOperand(2);
12602  ARMCC::CondCodes CC =
12603  (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
12604 
12605  // (brcond Chain BB ne CPSR (cmpz (and (cmov 0 1 CC CPSR Cmp) 1) 0))
12606  // -> (brcond Chain BB CC CPSR Cmp)
12607  if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&
12608  LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
12609  LHS->getOperand(0)->hasOneUse()) {
12610  auto *LHS00C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)->getOperand(0));
12611  auto *LHS01C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)->getOperand(1));
12612  auto *LHS1C = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
12613  auto *RHSC = dyn_cast<ConstantSDNode>(RHS);
12614  if ((LHS00C && LHS00C->getZExtValue() == 0) &&
12615  (LHS01C && LHS01C->getZExtValue() == 1) &&
12616  (LHS1C && LHS1C->getZExtValue() == 1) &&
12617  (RHSC && RHSC->getZExtValue() == 0)) {
12618  return DAG.getNode(
12619  ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2),
12620  LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4));
12621  }
12622  }
12623 
12624  return SDValue();
12625 }
12626 
12627 /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
12628 SDValue
12630  SDValue Cmp = N->getOperand(4);
12631  if (Cmp.getOpcode() != ARMISD::CMPZ)
12632  // Only looking at EQ and NE cases.
12633  return SDValue();
12634 
12635  EVT VT = N->getValueType(0);
12636  SDLoc dl(N);
12637  SDValue LHS = Cmp.getOperand(0);
12638  SDValue RHS = Cmp.getOperand(1);
12639  SDValue FalseVal = N->getOperand(0);
12640  SDValue TrueVal = N->getOperand(1);
12641  SDValue ARMcc = N->getOperand(2);
12642  ARMCC::CondCodes CC =
12643  (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
12644 
12645  // BFI is only available on V6T2+.
12646  if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
12647  SDValue R = PerformCMOVToBFICombine(N, DAG);
12648  if (R)
12649  return R;
12650  }
12651 
12652  // Simplify
12653  // mov r1, r0
12654  // cmp r1, x
12655  // mov r0, y
12656  // moveq r0, x
12657  // to
12658  // cmp r0, x
12659  // movne r0, y
12660  //
12661  // mov r1, r0
12662  // cmp r1, x
12663  // mov r0, x
12664  // movne r0, y
12665  // to
12666  // cmp r0, x
12667  // movne r0, y
12668  /// FIXME: Turn this into a target neutral optimization?
12669  SDValue Res;
12670  if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
12671  Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
12672  N->getOperand(3), Cmp);
12673  } else if (CC == ARMCC::EQ && TrueVal == RHS) {
12674  SDValue ARMcc;
12675  SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
12676  Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
12677  N->getOperand(3), NewCmp);
12678  }
12679 
12680  // (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0))
12681  // -> (cmov F T CC CPSR Cmp)
12682  if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse()) {
12683  auto *LHS0C = dyn_cast<ConstantSDNode>(LHS->getOperand(0));
12684  auto *LHS1C = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
12685  auto *RHSC = dyn_cast<ConstantSDNode>(RHS);
12686  if ((LHS0C && LHS0C->getZExtValue() == 0) &&
12687  (LHS1C && LHS1C->getZExtValue() == 1) &&
12688  (RHSC && RHSC->getZExtValue() == 0)) {
12689  return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
12690  LHS->getOperand(2), LHS->getOperand(3),
12691  LHS->getOperand(4));
12692  }
12693  }
12694 
12695  if (!VT.isInteger())
12696  return SDValue();
12697 
12698  // Materialize a boolean comparison for integers so we can avoid branching.
12699  if (isNullConstant(FalseVal)) {
12700  if (CC == ARMCC::EQ && isOneConstant(TrueVal)) {
12701  if (!Subtarget->isThumb1Only() && Subtarget->hasV5TOps()) {
12702  // If x == y then x - y == 0 and ARM's CLZ will return 32, shifting it
12703  // right 5 bits will make that 32 be 1, otherwise it will be 0.
12704  // CMOV 0, 1, ==, (CMPZ x, y) -> SRL (CTLZ (SUB x, y)), 5
12705  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
12706  Res = DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::CTLZ, dl, VT, Sub),
12707  DAG.getConstant(5, dl, MVT::i32));
12708  } else {
12709  // CMOV 0, 1, ==, (CMPZ x, y) ->
12710  // (ADDCARRY (SUB x, y), t:0, t:1)
12711  // where t = (SUBCARRY 0, (SUB x, y), 0)
12712  //
12713  // The SUBCARRY computes 0 - (x - y) and this will give a borrow when
12714  // x != y. In other words, a carry C == 1 when x == y, C == 0
12715  // otherwise.
12716  // The final ADDCARRY computes
12717  // x - y + (0 - (x - y)) + C == C
12718  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
12719  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
12720  SDValue Neg = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, Sub);
12721  // ISD::SUBCARRY returns a borrow but we want the carry here
12722  // actually.
12723  SDValue Carry =
12724  DAG.getNode(ISD::SUB, dl, MVT::i32,
12725  DAG.getConstant(1, dl, MVT::i32), Neg.getValue(1));
12726  Res = DAG.getNode(ISD::ADDCARRY, dl, VTs, Sub, Neg, Carry);
12727  }
12728  } else if (CC == ARMCC::NE && !isNullConstant(RHS) &&
12729  (!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) {
12730  // This seems pointless but will allow us to combine it further below.
12731  // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1
12732  SDValue Sub =
12733  DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
12734  SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
12735  Sub.getValue(1), SDValue());
12736  Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc,
12737  N->getOperand(3), CPSRGlue.getValue(1));
12738  FalseVal = Sub;
12739  }
12740  } else if (isNullConstant(TrueVal)) {
12741  if (CC == ARMCC::EQ && !isNullConstant(RHS) &&
12742  (!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) {
12743  // This seems pointless but will allow us to combine it further below
12744  // Note that we change == for != as this is the dual for the case above.
12745  // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1
12746  SDValue Sub =
12747  DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
12748  SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
12749  Sub.getValue(1), SDValue());
12750  Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal,
12751  DAG.getConstant(ARMCC::NE, dl, MVT::i32),
12752  N->getOperand(3), CPSRGlue.getValue(1));
12753  FalseVal = Sub;
12754  }
12755  }
12756 
12757  // On Thumb1, the DAG above may be further combined if z is a power of 2
12758  // (z == 2 ^ K).
12759  // CMOV (SUBS x, y), z, !=, (SUBS x, y):1 ->
12760  // merge t3, t4
12761  // where t1 = (SUBCARRY (SUB x, y), z, 0)
12762  // t2 = (SUBCARRY (SUB x, y), t1:0, t1:1)
12763  // t3 = if K != 0 then (SHL t2:0, K) else t2:0
12764  // t4 = (SUB 1, t2:1) [ we want a carry, not a borrow ]
12765  const APInt *TrueConst;
12766  if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&
12767  (FalseVal.getOpcode() == ARMISD::SUBS) &&
12768  (FalseVal.getOperand(0) == LHS) && (FalseVal.getOperand(1) == RHS) &&
12769  (TrueConst = isPowerOf2Constant(TrueVal))) {
12770  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
12771  unsigned ShiftAmount = TrueConst->logBase2();
12772  if (ShiftAmount)
12773  TrueVal = DAG.getConstant(1, dl, VT);
12774  SDValue Subc = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, TrueVal);
12775  Res = DAG.getNode(ISD::SUBCARRY, dl, VTs, FalseVal, Subc, Subc.getValue(1));
12776  // Make it a carry, not a borrow.
12777  SDValue Carry = DAG.getNode(
12778  ISD::SUB, dl, VT, DAG.getConstant(1, dl, MVT::i32), Res.getValue(1));
12779  Res = DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Res, Carry);
12780 
12781  if (ShiftAmount)
12782  Res = DAG.getNode(ISD::SHL, dl, VT, Res,
12783  DAG.getConstant(ShiftAmount, dl, MVT::i32));
12784  }
12785 
12786  if (Res.getNode()) {
12787  KnownBits Known = DAG.computeKnownBits(SDValue(N,0));
12788  // Capture demanded bits information that would be otherwise lost.
12789  if (Known.Zero == 0xfffffffe)
12790  Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
12791  DAG.getValueType(MVT::i1));
12792  else if (Known.Zero == 0xffffff00)
12793  Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
12794  DAG.getValueType(MVT::i8));
12795  else if (Known.Zero == 0xffff0000)
12796  Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
12797  DAG.getValueType(MVT::i16));
12798  }
12799 
12800  return Res;
12801 }
12802 
12804  DAGCombinerInfo &DCI) const {
12805  switch (N->getOpcode()) {
12806  default: break;
12807  case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
12808  case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
12809  case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
12810  case ISD::SUB: return PerformSUBCombine(N, DCI);
12811  case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
12812  case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
12813  case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
12814  case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
12815  case ARMISD::ADDC:
12816  case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget);
12817  case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI, Subtarget);
12818  case ARMISD::BFI: return PerformBFICombine(N, DCI);
12819  case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
12820  case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
12821  case ISD::STORE: return PerformSTORECombine(N, DCI);
12822  case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
12823  case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
12825  case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
12826  case ARMISD::VDUP: return PerformVDUPCombine(N, DCI);
12827  case ISD::FP_TO_SINT:
12828  case ISD::FP_TO_UINT:
12829  return PerformVCVTCombine(N, DCI.DAG, Subtarget);
12830  case ISD::FDIV:
12831  return PerformVDIVCombine(N, DCI.DAG, Subtarget);
12833  case ISD::SHL:
12834  case ISD::SRA:
12835  case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget);
12836  case ISD::SIGN_EXTEND:
12837  case ISD::ZERO_EXTEND:
12838  case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
12839  case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
12840  case ARMISD::BRCOND: return PerformBRCONDCombine(N, DCI.DAG);
12841  case ISD::LOAD: return PerformLOADCombine(N, DCI);
12842  case ARMISD::VLD1DUP:
12843  case ARMISD::VLD2DUP:
12844  case ARMISD::VLD3DUP:
12845  case ARMISD::VLD4DUP:
12846  return PerformVLDCombine(N, DCI);
12847  case ARMISD::BUILD_VECTOR:
12848  return PerformARMBUILD_VECTORCombine(N, DCI);
12849  case ARMISD::SMULWB: {
12850  unsigned BitWidth = N->getValueType(0).getSizeInBits();
12851  APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
12852  if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
12853  return SDValue();
12854  break;
12855  }
12856  case ARMISD::SMULWT: {
12857  unsigned BitWidth = N->getValueType(0).getSizeInBits();
12858  APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
12859  if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
12860  return SDValue();
12861  break;
12862  }
12863  case ARMISD::SMLALBB: {
12864  unsigned BitWidth = N->getValueType(0).getSizeInBits();
12865  APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
12866  if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
12867  (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
12868  return SDValue();
12869  break;
12870  }
12871  case ARMISD::SMLALBT: {
12872  unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits();
12873  APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
12874  unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits();
12875  APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
12876  if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) ||
12877  (SimplifyDemandedBits(N->getOperand(1), HighMask, DCI)))
12878  return SDValue();
12879  break;
12880  }
12881  case ARMISD::SMLALTB: {
12882  unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits();
12883  APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
12884  unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits();
12885  APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
12886  if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) ||
12887  (SimplifyDemandedBits(N->getOperand(1), LowMask, DCI)))
12888  return SDValue();
12889  break;
12890  }
12891  case ARMISD::SMLALTT: {
12892  unsigned BitWidth = N->getValueType(0).getSizeInBits();
12893  APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
12894  if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
12895  (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
12896  return SDValue();
12897  break;
12898  }
12899  case ISD::INTRINSIC_VOID:
12901  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12925  return PerformVLDCombine(N, DCI);
12926  default: break;
12927  }
12928  break;
12929  }
12930  return SDValue();
12931 }
12932 
12934  EVT VT) const {
12935  return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
12936 }
12937 
12939  unsigned,
12940  unsigned,
12941  bool *Fast) const {
12942  // Depends what it gets converted into if the type is weird.
12943  if (!VT.isSimple())
12944  return false;
12945 
12946  // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
12947  bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
12948 
12949  switch (VT.getSimpleVT().SimpleTy) {
12950  default:
12951  return false;
12952  case MVT::i8:
12953  case MVT::i16:
12954  case MVT::i32: {
12955  // Unaligned access can use (for example) LRDB, LRDH, LDR
12956  if (AllowsUnaligned) {
12957  if (Fast)
12958  *Fast = Subtarget->hasV7Ops();
12959  return true;
12960  }
12961  return false;
12962  }
12963  case MVT::f64:
12964  case MVT::v2f64: {
12965  // For any little-endian targets with neon, we can support unaligned ld/st
12966  // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
12967  // A big-endian target may also explicitly support unaligned accesses
12968  if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
12969  if (Fast)
12970  *Fast = true;
12971  return true;
12972  }
12973  return false;
12974  }
12975  }
12976 }
12977 
12978 static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
12979  unsigned AlignCheck) {
12980  return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
12981  (DstAlign == 0 || DstAlign % AlignCheck == 0));
12982 }
12983 
12985  unsigned DstAlign, unsigned SrcAlign,
12986  bool IsMemset, bool ZeroMemset,
12987  bool MemcpyStrSrc,
12988  MachineFunction &MF) const {
12989  const Function &F = MF.getFunction();
12990 
12991  // See if we can use NEON instructions for this...
12992  if ((!IsMemset || ZeroMemset) && Subtarget->hasNEON() &&
12994  bool Fast;
12995  if (Size >= 16 &&
12996  (memOpAlign(SrcAlign, DstAlign, 16) ||
12997  (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, &Fast) && Fast))) {
12998  return MVT::v2f64;
12999  } else if (Size >= 8 &&
13000  (memOpAlign(SrcAlign, DstAlign, 8) ||
13001  (allowsMisalignedMemoryAccesses(MVT::f64, 0, 1, &Fast) &&
13002  Fast))) {
13003  return MVT::f64;
13004  }
13005  }
13006 
13007  // Let the target-independent logic figure it out.
13008  return MVT::Other;
13009 }
13010 
13011 // 64-bit integers are split into their high and low parts and held in two
13012 // different registers, so the trunc is free since the low register can just
13013 // be used.
13014 bool ARMTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
13015  if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
13016  return false;
13017  unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
13018  unsigned DestBits = DstTy->getPrimitiveSizeInBits();
13019  return (SrcBits == 64 && DestBits == 32);
13020 }
13021 
13022 bool ARMTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
13023  if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
13024  !DstVT.isInteger())
13025  return false;
13026  unsigned SrcBits = SrcVT.getSizeInBits();
13027  unsigned DestBits = DstVT.getSizeInBits();
13028  return (SrcBits == 64 && DestBits == 32);
13029 }
13030 
13032  if (Val.getOpcode() != ISD::LOAD)
13033  return false;
13034 
13035  EVT VT1 = Val.getValueType();
13036  if (!VT1.isSimple() || !VT1.isInteger() ||
13037  !VT2.isSimple() || !VT2.isInteger())
13038  return false;
13039 
13040  switch (VT1.getSimpleVT().SimpleTy) {
13041  default: break;
13042  case MVT::i1:
13043  case MVT::i8:
13044  case MVT::i16:
13045  // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
13046  return true;
13047  }
13048 
13049  return false;
13050 }
13051 
13053  if (!VT.isSimple())
13054  return false;
13055 
13056  // There are quite a few FP16 instructions (e.g. VNMLA, VNMLS, etc.) that
13057  // negate values directly (fneg is free). So, we don't want to let the DAG
13058  // combiner rewrite fneg into xors and some other instructions. For f16 and
13059  // FullFP16 argument passing, some bitcast nodes may be introduced,
13060  // triggering this DAG combine rewrite, so we are avoiding that with this.
13061  switch (VT.getSimpleVT().SimpleTy) {
13062  default: break;
13063  case MVT::f16:
13064  return Subtarget->hasFullFP16();
13065  }
13066 
13067  return false;
13068 }
13069 
13071  EVT VT = ExtVal.getValueType();
13072 
13073  if (!isTypeLegal(VT))
13074  return false;
13075 
13076  // Don't create a loadext if we can fold the extension into a wide/long
13077  // instruction.
13078  // If there's more than one user instruction, the loadext is desirable no
13079  // matter what. There can be two uses by the same instruction.
13080  if (ExtVal->use_empty() ||
13081  !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode()))
13082  return true;
13083 
13084  SDNode *U = *ExtVal->use_begin();
13085  if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
13086  U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHL))
13087  return false;
13088 
13089  return true;
13090 }
13091 
13093  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
13094  return false;
13095 
13096  if (!isTypeLegal(EVT::getEVT(Ty1)))
13097  return false;
13098 
13099  assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");
13100 
13101  // Assuming the caller doesn't have a zeroext or signext return parameter,
13102  // truncation all the way down to i1 is valid.
13103  return true;
13104 }
13105 
13107  const AddrMode &AM, Type *Ty,
13108  unsigned AS) const {
13109  if (isLegalAddressingMode(DL, AM, Ty, AS)) {
13110  if (Subtarget->hasFPAO())
13111  return AM.Scale < 0 ? 1 : 0; // positive offsets execute faster
13112  return 0;
13113  }
13114  return -1;
13115 }
13116 
13117 static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
13118  if (V < 0)
13119  return false;
13120 
13121  unsigned Scale = 1;
13122  switch (VT.getSimpleVT().SimpleTy) {
13123  default: return false;
13124  case MVT::i1:
13125  case MVT::i8:
13126  // Scale == 1;
13127  break;
13128  case MVT::i16:
13129  // Scale == 2;
13130  Scale = 2;
13131  break;
13132  case MVT::i32:
13133  // Scale == 4;
13134  Scale = 4;
13135  break;
13136  }
13137 
13138  if ((V & (Scale - 1)) != 0)
13139  return false;
13140  V /= Scale;
13141  return V == (V & ((1LL << 5) - 1));
13142 }
13143 
13144 static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
13145  const ARMSubtarget *Subtarget) {
13146  bool isNeg = false;
13147  if (V < 0) {
13148  isNeg = true;
13149  V = - V;
13150  }
13151 
13152  switch (VT.getSimpleVT().SimpleTy) {
13153  default: return false;
13154  case MVT::i1:
13155  case MVT::i8:
13156  case MVT::i16:
13157  case MVT::i32:
13158  // + imm12 or - imm8
13159  if (isNeg)
13160  return V == (V & ((1LL << 8) - 1));
13161  return V == (V & ((1LL << 12) - 1));
13162  case MVT::f32:
13163  case MVT::f64:
13164  // Same as ARM mode. FIXME: NEON?
13165  if (!Subtarget->hasVFP2())
13166  return false;
13167  if ((V & 3) != 0)
13168  return false;
13169  V >>= 2;
13170  return V == (V & ((1LL << 8) - 1));
13171  }
13172 }
13173 
13174 /// isLegalAddressImmediate - Return true if the integer value can be used
13175 /// as the offset of the target addressing mode for load / store of the
13176 /// given type.
13177 static bool isLegalAddressImmediate(int64_t V, EVT VT,
13178  const ARMSubtarget *Subtarget) {
13179  if (V == 0)
13180  return true;
13181 
13182  if (!VT.isSimple())
13183  return false;
13184 
13185  if (Subtarget->isThumb1Only())
13186  return isLegalT1AddressImmediate(V, VT);
13187  else if (Subtarget->isThumb2())
13188  return isLegalT2AddressImmediate(V, VT, Subtarget);
13189 
13190  // ARM mode.
13191  if (V < 0)
13192  V = - V;
13193  switch (VT.getSimpleVT().SimpleTy) {
13194  default: return false;
13195  case MVT::i1:
13196  case MVT::i8:
13197  case MVT::i32:
13198  // +- imm12
13199  return V == (V & ((1LL << 12) - 1));
13200  case MVT::i16:
13201  // +- imm8
13202  return V == (V & ((1LL << 8) - 1));
13203  case MVT::f32:
13204  case MVT::f64:
13205  if (!Subtarget->hasVFP2()) // FIXME: NEON?
13206  return false;
13207  if ((V & 3) != 0)
13208  return false;
13209  V >>= 2;
13210  return V == (V & ((1LL << 8) - 1));
13211  }
13212 }
13213 
13215  EVT VT) const {
13216  int Scale = AM.Scale;
13217  if (Scale < 0)
13218  return false;
13219 
13220  switch (VT.getSimpleVT().SimpleTy) {
13221  default: return false;
13222  case MVT::i1:
13223  case MVT::i8:
13224  case MVT::i16:
13225  case MVT::i32:
13226  if (Scale == 1)
13227  return true;
13228  // r + r << imm
13229  Scale = Scale & ~1;
13230  return Scale == 2 || Scale == 4 || Scale == 8;
13231  case MVT::i64:
13232  // FIXME: What are we trying to model here? ldrd doesn't have an r + r
13233  // version in Thumb mode.
13234  // r + r
13235  if (Scale == 1)
13236  return true;
13237  // r * 2 (this can be lowered to r + r).
13238  if (!AM.HasBaseReg && Scale == 2)
13239  return true;
13240  return false;
13241  case MVT::isVoid:
13242  // Note, we allow "void" uses (basically, uses that aren't loads or
13243  // stores), because arm allows folding a scale into many arithmetic
13244  // operations. This should be made more precise and revisited later.
13245 
13246  // Allow r << imm, but the imm has to be a multiple of two.
13247  if (Scale & 1) return false;
13248  return isPowerOf2_32(Scale);
13249  }
13250 }
13251 
13253  EVT VT) const {
13254  const int Scale = AM.Scale;
13255 
13256  // Negative scales are not supported in Thumb1.
13257  if (Scale < 0)
13258  return false;
13259 
13260  // Thumb1 addressing modes do not support register scaling excepting the
13261  // following cases:
13262  // 1. Scale == 1 means no scaling.
13263  // 2. Scale == 2 this can be lowered to r + r if there is no base register.
13264  return (Scale == 1) || (!AM.HasBaseReg && Scale == 2);
13265 }
13266 
13267 /// isLegalAddressingMode - Return true if the addressing mode represented
13268 /// by AM is legal for this target, for a load/store of the specified type.
13270  const AddrMode &AM, Type *Ty,
13271  unsigned AS, Instruction *I) const {
13272  EVT VT = getValueType(DL, Ty, true);
13273  if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
13274  return false;
13275 
13276  // Can never fold addr of global into load/store.
13277  if (AM.BaseGV)
13278  return false;
13279 
13280  switch (AM.Scale) {
13281  case 0: // no scale reg, must be "r+i" or "r", or "i".
13282  break;
13283  default:
13284  // ARM doesn't support any R+R*scale+imm addr modes.
13285  if (AM.BaseOffs)
13286  return false;
13287 
13288  if (!VT.isSimple())
13289  return false;
13290 
13291  if (Subtarget->isThumb1Only())
13292  return isLegalT1ScaledAddressingMode(AM, VT);
13293 
13294  if (Subtarget->isThumb2())
13295  return isLegalT2ScaledAddressingMode(AM, VT);
13296 
13297  int Scale = AM.Scale;
13298  switch (VT.getSimpleVT().SimpleTy) {
13299  default: return false;
13300  case MVT::i1:
13301  case MVT::i8:
13302  case MVT::i32:
13303  if (Scale < 0) Scale = -Scale;
13304  if (Scale == 1)
13305  return true;
13306  // r + r << imm
13307  return isPowerOf2_32(Scale & ~1);
13308  case MVT::i16:
13309  case MVT::i64:
13310  // r +/- r
13311  if (Scale == 1 || (AM.HasBaseReg && Scale == -1))
13312  return true;
13313  // r * 2 (this can be lowered to r + r).
13314  if (!AM.HasBaseReg && Scale == 2)
13315  return true;
13316  return false;
13317 
13318  case MVT::isVoid:
13319  // Note, we allow "void" uses (basically, uses that aren't loads or
13320  // stores), because arm allows folding a scale into many arithmetic
13321  // operations. This should be made more precise and revisited later.
13322 
13323  // Allow r << imm, but the imm has to be a multiple of two.
13324  if (Scale & 1) return false;
13325  return isPowerOf2_32(Scale);
13326  }
13327  }
13328  return true;
13329 }
13330 
13331 /// isLegalICmpImmediate - Return true if the specified immediate is legal
13332 /// icmp immediate, that is the target has icmp instructions which can compare
13333 /// a register against the immediate without having to materialize the
13334 /// immediate into a register.
13336  // Thumb2 and ARM modes can use cmn for negative immediates.
13337  if (!Subtarget->isThumb())
13338  return ARM_AM::getSOImmVal((uint32_t)Imm) != -1 ||
13339  ARM_AM::getSOImmVal(-(uint32_t)Imm) != -1;
13340  if (Subtarget->isThumb2())
13341  return ARM_AM::getT2SOImmVal((uint32_t)Imm) != -1 ||
13342  ARM_AM::getT2SOImmVal(-(uint32_t)Imm) != -1;
13343  // Thumb1 doesn't have cmn, and only 8-bit immediates.
13344  return Imm >= 0 && Imm <= 255;
13345 }
13346 
13347 /// isLegalAddImmediate - Return true if the specified immediate is a legal add
13348 /// *or sub* immediate, that is the target has add or sub instructions which can
13349 /// add a register with the immediate without having to materialize the
13350 /// immediate into a register.
13352  // Same encoding for add/sub, just flip the sign.
13353  int64_t AbsImm = std::abs(Imm);
13354  if (!Subtarget->isThumb())
13355  return ARM_AM::getSOImmVal(AbsImm) != -1;
13356  if (Subtarget->isThumb2())
13357  return ARM_AM::getT2SOImmVal(AbsImm) != -1;
13358  // Thumb1 only has 8-bit unsigned immediate.
13359  return AbsImm >= 0 && AbsImm <= 255;
13360 }
13361 
13362 static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
13363  bool isSEXTLoad, SDValue &Base,
13364  SDValue &Offset, bool &isInc,
13365  SelectionDAG &DAG) {
13366  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
13367  return false;
13368 
13369  if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
13370  // AddressingMode 3
13371  Base = Ptr->getOperand(0);
13372  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
13373  int RHSC = (int)RHS->getZExtValue();
13374  if (RHSC < 0 && RHSC > -256) {
13375  assert(Ptr->getOpcode() == ISD::ADD);
13376  isInc = false;
13377  Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
13378  return true;
13379  }
13380  }
13381  isInc = (Ptr->getOpcode() == ISD::ADD);
13382  Offset = Ptr->getOperand(1);
13383  return true;
13384  } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
13385  // AddressingMode 2
13386  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
13387  int RHSC = (int)RHS->getZExtValue();
13388  if (RHSC < 0 && RHSC > -0x1000) {
13389  assert(Ptr->getOpcode() == ISD::ADD);
13390  isInc = false;
13391  Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
13392  Base = Ptr->getOperand(0);
13393  return true;
13394  }
13395  }
13396 
13397  if (Ptr->getOpcode() == ISD::ADD) {
13398  isInc = true;
13399  ARM_AM::ShiftOpc ShOpcVal=
13401  if (ShOpcVal != ARM_AM::no_shift) {
13402  Base = Ptr->getOperand(1);
13403  Offset = Ptr->getOperand(0);
13404  } else {
13405  Base = Ptr->getOperand(0);
13406  Offset = Ptr->getOperand(1);
13407  }
13408  return true;
13409  }
13410 
13411  isInc = (Ptr->getOpcode() == ISD::ADD);
13412  Base = Ptr->getOperand(0);
13413  Offset = Ptr->getOperand(1);
13414  return true;
13415  }
13416 
13417  // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
13418  return false;
13419 }
13420 
13421 static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
13422  bool isSEXTLoad, SDValue &Base,
13423  SDValue &Offset, bool &isInc,
13424  SelectionDAG &DAG) {
13425  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
13426  return false;
13427 
13428  Base = Ptr->getOperand(0);
13429  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
13430  int RHSC = (int)RHS->getZExtValue();
13431  if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
13432  assert(Ptr->getOpcode() == ISD::ADD);
13433  isInc = false;
13434  Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
13435  return true;
13436  } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
13437  isInc = Ptr->getOpcode() == ISD::ADD;
13438  Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
13439  return true;
13440  }
13441  }
13442 
13443  return false;
13444 }
13445 
13446 /// getPreIndexedAddressParts - returns true by value, base pointer and
13447 /// offset pointer and addressing mode by reference if the node's address
13448 /// can be legally represented as pre-indexed load / store address.
13449 bool
13451  SDValue &Offset,
13452  ISD::MemIndexedMode &AM,
13453  SelectionDAG &DAG) const {
13454  if (Subtarget->isThumb1Only())
13455  return false;
13456 
13457  EVT VT;
13458  SDValue Ptr;
13459  bool isSEXTLoad = false;
13460  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
13461  Ptr = LD->getBasePtr();
13462  VT = LD->getMemoryVT();
13463  isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
13464  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
13465  Ptr = ST->getBasePtr();
13466  VT = ST->getMemoryVT();
13467  } else
13468  return false;
13469 
13470  bool isInc;
13471  bool isLegal = false;
13472  if (Subtarget->isThumb2())
13473  isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
13474  Offset, isInc, DAG);
13475  else
13476  isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
13477  Offset, isInc, DAG);
13478  if (!isLegal)
13479  return false;
13480 
13481  AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
13482  return true;
13483 }
13484 
13485 /// getPostIndexedAddressParts - returns true by value, base pointer and
13486 /// offset pointer and addressing mode by reference if this node can be
13487 /// combined with a load / store to form a post-indexed load / store.
13489  SDValue &Base,
13490  SDValue &Offset,
13491  ISD::MemIndexedMode &AM,
13492  SelectionDAG &DAG) const {
13493  EVT VT;
13494  SDValue Ptr;
13495  bool isSEXTLoad = false, isNonExt;
13496  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
13497  VT = LD->getMemoryVT();
13498  Ptr = LD->getBasePtr();
13499  isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
13500  isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
13501  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
13502  VT = ST->getMemoryVT();
13503  Ptr = ST->getBasePtr();
13504  isNonExt = !ST->isTruncatingStore();
13505  } else
13506  return false;
13507 
13508  if (Subtarget->isThumb1Only()) {
13509  // Thumb-1 can do a limited post-inc load or store as an updating LDM. It
13510  // must be non-extending/truncating, i32, with an offset of 4.
13511  assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!");
13512  if (Op->getOpcode() != ISD::ADD || !isNonExt)
13513  return false;
13514  auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));
13515  if (!RHS || RHS->getZExtValue() != 4)
13516  return false;
13517 
13518  Offset = Op->getOperand(1);
13519  Base = Op->getOperand(0);
13520  AM = ISD::POST_INC;
13521  return true;
13522  }
13523 
13524  bool isInc;
13525  bool isLegal = false;
13526  if (Subtarget->isThumb2())
13527  isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
13528  isInc, DAG);
13529  else
13530  isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
13531  isInc, DAG);
13532  if (!isLegal)
13533  return false;
13534 
13535  if (Ptr != Base) {
13536  // Swap base ptr and offset to catch more post-index load / store when
13537  // it's legal. In Thumb2 mode, offset must be an immediate.
13538  if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
13539  !Subtarget->isThumb2())
13540  std::swap(Base, Offset);
13541 
13542  // Post-indexed load / store update the base pointer.
13543  if (Ptr != Base)
13544  return false;
13545  }
13546 
13547  AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
13548  return true;
13549 }
13550 
13552  KnownBits &Known,
13553  const APInt &DemandedElts,
13554  const SelectionDAG &DAG,
13555  unsigned Depth) const {
13556  unsigned BitWidth = Known.getBitWidth();
13557  Known.resetAll();
13558  switch (Op.getOpcode()) {
13559  default: break;
13560  case ARMISD::ADDC:
13561  case ARMISD::ADDE:
13562  case ARMISD::SUBC:
13563  case ARMISD::SUBE:
13564  // Special cases when we convert a carry to a boolean.
13565  if (Op.getResNo() == 0) {
13566  SDValue LHS = Op.getOperand(0);
13567  SDValue RHS = Op.getOperand(1);
13568  // (ADDE 0, 0, C) will give us a single bit.
13569  if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) &&
13570  isNullConstant(RHS)) {
13571  Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
13572  return;
13573  }
13574  }
13575  break;
13576  case ARMISD::CMOV: {
13577  // Bits are known zero/one if known on the LHS and RHS.
13578  Known = DAG.computeKnownBits(Op.getOperand(0), Depth+1);
13579  if (Known.isUnknown())
13580  return;
13581 
13582  KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
13583  Known.Zero &= KnownRHS.Zero;
13584  Known.One &= KnownRHS.One;
13585  return;
13586  }
13587  case ISD::INTRINSIC_W_CHAIN: {
13588  ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
13589  Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
13590  switch (IntID) {
13591  default: return;
13592  case Intrinsic::arm_ldaex:
13593  case Intrinsic::arm_ldrex: {
13594  EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
13595  unsigned MemBits = VT.getScalarSizeInBits();
13596  Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
13597  return;
13598  }
13599  }
13600  }
13601  case ARMISD::BFI: {
13602  // Conservatively, we can recurse down the first operand
13603  // and just mask out all affected bits.
13604  Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
13605 
13606  // The operand to BFI is already a mask suitable for removing the bits it
13607  // sets.
13608  ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
13609  const APInt &Mask = CI->getAPIntValue();
13610  Known.Zero &= Mask;
13611  Known.One &= Mask;
13612  return;
13613  }
13614  case ARMISD::VGETLANEs:
13615  case ARMISD::VGETLANEu: {
13616  const SDValue &SrcSV = Op.getOperand(0);
13617  EVT VecVT = SrcSV.getValueType();
13618  assert(VecVT.isVector() && "VGETLANE expected a vector type");
13619  const unsigned NumSrcElts = VecVT.getVectorNumElements();
13620  ConstantSDNode *Pos = cast<ConstantSDNode>(Op.getOperand(1).getNode());
13621  assert(Pos->getAPIntValue().ult(NumSrcElts) &&
13622  "VGETLANE index out of bounds");
13623  unsigned Idx = Pos->getZExtValue();
13624  APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
13625  Known = DAG.computeKnownBits(SrcSV, DemandedElt, Depth + 1);
13626 
13627  EVT VT = Op.getValueType();
13628  const unsigned DstSz = VT.getScalarSizeInBits();
13629  const unsigned SrcSz = VecVT.getVectorElementType().getSizeInBits();
13630  assert(SrcSz == Known.getBitWidth());
13631  assert(DstSz > SrcSz);
13632  if (Op.getOpcode() == ARMISD::VGETLANEs)
13633  Known = Known.sext(DstSz);
13634  else {
13635  Known = Known.zext(DstSz);
13636  Known.Zero.setBitsFrom(SrcSz);
13637  }
13638  assert(DstSz == Known.getBitWidth());
13639  break;
13640  }
13641  }
13642 }
13643 
13644 bool
13646  const APInt &DemandedAPInt,
13647  TargetLoweringOpt &TLO) const {
13648  // Delay optimization, so we don't have to deal with illegal types, or block
13649  // optimizations.
13650  if (!TLO.LegalOps)
13651  return false;
13652 
13653  // Only optimize AND for now.
13654  if (Op.getOpcode() != ISD::AND)
13655  return false;
13656 
13657  EVT VT = Op.getValueType();
13658 
13659  // Ignore vectors.
13660  if (VT.isVector())
13661  return false;
13662 
13663  assert(VT == MVT::i32 && "Unexpected integer type");
13664 
13665  // Make sure the RHS really is a constant.
13667  if (!C)
13668  return false;
13669 
13670  unsigned Mask = C->getZExtValue();
13671 
13672  unsigned Demanded = DemandedAPInt.getZExtValue();
13673  unsigned ShrunkMask = Mask & Demanded;
13674  unsigned ExpandedMask = Mask | ~Demanded;
13675 
13676  // If the mask is all zeros, let the target-independent code replace the
13677  // result with zero.
13678  if (ShrunkMask == 0)
13679  return false;
13680 
13681  // If the mask is all ones, erase the AND. (Currently, the target-independent
13682  // code won't do this, so we have to do it explicitly to avoid an infinite
13683  // loop in obscure cases.)
13684  if (ExpandedMask == ~0U)
13685  return TLO.CombineTo(Op, Op.getOperand(0));
13686 
13687  auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
13688  return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
13689  };
13690  auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
13691  if (NewMask == Mask)
13692  return true;
13693  SDLoc DL(Op);
13694  SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
13695  SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
13696  return TLO.CombineTo(Op, NewOp);
13697  };
13698 
13699  // Prefer uxtb mask.
13700  if (IsLegalMask(0xFF))
13701  return UseMask(0xFF);
13702 
13703  // Prefer uxth mask.
13704  if (IsLegalMask(0xFFFF))
13705  return UseMask(0xFFFF);
13706 
13707  // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
13708  // FIXME: Prefer a contiguous sequence of bits for other optimizations.
13709  if (ShrunkMask < 256)
13710  return UseMask(ShrunkMask);
13711 
13712  // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
13713  // FIXME: Prefer a contiguous sequence of bits for other optimizations.
13714  if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
13715  return UseMask(ExpandedMask);
13716 
13717  // Potential improvements:
13718  //
13719  // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
13720  // We could try to prefer Thumb1 immediates which can be lowered to a
13721  // two-instruction sequence.
13722  // We could try to recognize more legal ARM/Thumb2 immediates here.
13723 
13724  return false;
13725 }
13726 
13727 
13728 //===----------------------------------------------------------------------===//
13729 // ARM Inline Assembly Support
13730 //===----------------------------------------------------------------------===//
13731 
13733  // Looking for "rev" which is V6+.
13734  if (!Subtarget->hasV6Ops())
13735  return false;
13736 
13737  InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
13738  std::string AsmStr = IA->getAsmString();
13739  SmallVector<StringRef, 4> AsmPieces;
13740  SplitString(AsmStr, AsmPieces, ";\n");
13741 
13742  switch (AsmPieces.size()) {
13743  default: return false;
13744  case 1:
13745  AsmStr = AsmPieces[0];
13746  AsmPieces.clear();
13747  SplitString(AsmStr, AsmPieces, " \t,");
13748 
13749  // rev $0, $1
13750  if (AsmPieces.size() == 3 &&
13751  AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
13752  IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
13753  IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
13754  if (Ty && Ty->getBitWidth() == 32)
13756  }
13757  break;
13758  }
13759 
13760  return false;
13761 }
13762 
13763 const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
13764  // At this point, we have to lower this constraint to something else, so we
13765  // lower it to an "r" or "w". However, by doing this we will force the result
13766  // to be in register, while the X constraint is much more permissive.
13767  //
13768  // Although we are correct (we are free to emit anything, without
13769  // constraints), we might break use cases that would expect us to be more
13770  // efficient and emit something else.
13771  if (!Subtarget->hasVFP2())
13772  return "r";
13773  if (ConstraintVT.isFloatingPoint())
13774  return "w";
13775  if (ConstraintVT.isVector() && Subtarget->hasNEON() &&
13776  (ConstraintVT.getSizeInBits() == 64 ||
13777  ConstraintVT.getSizeInBits() == 128))
13778  return "w";
13779 
13780  return "r";
13781 }
13782 
13783 /// getConstraintType - Given a constraint letter, return the type of
13784 /// constraint it is for this target.
13787  if (Constraint.size() == 1) {
13788  switch (Constraint[0]) {
13789  default: break;
13790  case 'l': return C_RegisterClass;
13791  case 'w': return C_RegisterClass;
13792  case 'h': return C_RegisterClass;
13793  case 'x': return C_RegisterClass;
13794  case 't': return C_RegisterClass;
13795  case 'j': return C_Other; // Constant for movw.
13796  // An address with a single base register. Due to the way we
13797  // currently handle addresses it is the same as an 'r' memory constraint.
13798  case 'Q': return C_Memory;
13799  }
13800  } else if (Constraint.size() == 2) {
13801  switch (Constraint[0]) {
13802  default: break;
13803  // All 'U+' constraints are addresses.
13804  case 'U': return C_Memory;
13805  }
13806  }
13807  return TargetLowering::getConstraintType(Constraint);
13808 }
13809 
13810 /// Examine constraint type and operand type and determine a weight value.
13811 /// This object must already have been set up with the operand type
13812 /// and the current alternative constraint selected.
13815  AsmOperandInfo &info, const char *constraint) const {
13816  ConstraintWeight weight = CW_Invalid;
13817  Value *CallOperandVal = info.CallOperandVal;
13818  // If we don't have a value, we can't do a match,
13819  // but allow it at the lowest weight.
13820  if (!CallOperandVal)
13821  return CW_Default;
13822  Type *type = CallOperandVal->getType();
13823  // Look at the constraint type.
13824  switch (*constraint) {
13825  default:
13826  weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
13827  break;
13828  case 'l':
13829  if (type->isIntegerTy()) {
13830  if (Subtarget->isThumb())
13831  weight = CW_SpecificReg;
13832  else
13833  weight = CW_Register;
13834  }
13835  break;
13836  case 'w':
13837  if (type->isFloatingPointTy())
13838  weight = CW_Register;
13839  break;
13840  }
13841  return weight;
13842 }
13843 
13844 using RCPair = std::pair<unsigned, const TargetRegisterClass *>;
13845 
13847  const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
13848  if (Constraint.size() == 1) {
13849  // GCC ARM Constraint Letters
13850  switch (Constraint[0]) {
13851  case 'l': // Low regs or general regs.
13852  if (Subtarget->isThumb())
13853  return RCPair(0U, &ARM::tGPRRegClass);
13854  return RCPair(0U, &ARM::GPRRegClass);
13855  case 'h': // High regs or no regs.
13856  if (Subtarget->isThumb())
13857  return RCPair(0U, &ARM::hGPRRegClass);
13858  break;
13859  case 'r':
13860  if (Subtarget->isThumb1Only())
13861  return RCPair(0U, &ARM::tGPRRegClass);
13862  return RCPair(0U, &ARM::GPRRegClass);
13863  case 'w':
13864  if (VT == MVT::Other)
13865  break;
13866  if (VT == MVT::f32)
13867  return RCPair(0U, &ARM::SPRRegClass);
13868  if (VT.getSizeInBits() == 64)
13869  return RCPair(0U, &ARM::DPRRegClass);
13870  if (VT.getSizeInBits() == 128)
13871  return RCPair(0U, &ARM::QPRRegClass);
13872  break;
13873  case 'x':
13874  if (VT == MVT::Other)
13875  break;
13876  if (VT == MVT::f32)
13877  return RCPair(0U, &ARM::SPR_8RegClass);
13878  if (VT.getSizeInBits() == 64)
13879  return RCPair(0U, &ARM::DPR_8RegClass);
13880  if (VT.getSizeInBits() == 128)
13881  return RCPair(0U, &ARM::QPR_8RegClass);
13882  break;
13883  case 't':
13884  if (VT == MVT::Other)
13885  break;
13886  if (VT == MVT::f32 || VT == MVT::i32)
13887  return RCPair(0U, &ARM::SPRRegClass);
13888  if (VT.getSizeInBits() == 64)
13889  return RCPair(0U, &ARM::DPR_VFP2RegClass);
13890  if (VT.getSizeInBits() == 128)
13891  return RCPair(0U, &ARM::QPR_VFP2RegClass);
13892  break;
13893  }
13894  }
13895  if (StringRef("{cc}").equals_lower(Constraint))
13896  return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
13897 
13898  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
13899 }
13900 
13901 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
13902 /// vector. If it is invalid, don't add anything to Ops.
13904  std::string &Constraint,
13905  std::vector<SDValue>&Ops,
13906  SelectionDAG &DAG) const {
13907  SDValue Result;
13908 
13909  // Currently only support length 1 constraints.
13910  if (Constraint.length() != 1) return;
13911 
13912  char ConstraintLetter = Constraint[0];
13913  switch (ConstraintLetter) {
13914  default: break;
13915  case 'j':
13916  case 'I': case 'J': case 'K': case 'L':
13917  case 'M': case 'N': case 'O':
13919  if (!C)
13920  return;
13921 
13922  int64_t CVal64 = C->getSExtValue();
13923  int CVal = (int) CVal64;
13924  // None of these constraints allow values larger than 32 bits. Check
13925  // that the value fits in an int.
13926  if (CVal != CVal64)
13927  return;
13928 
13929  switch (ConstraintLetter) {
13930  case 'j':
13931  // Constant suitable for movw, must be between 0 and
13932  // 65535.
13933  if (Subtarget->hasV6T2Ops())
13934  if (CVal >= 0 && CVal <= 65535)
13935  break;
13936  return;
13937  case 'I':
13938  if (Subtarget->isThumb1Only()) {
13939  // This must be a constant between 0 and 255, for ADD
13940  // immediates.
13941  if (CVal >= 0 && CVal <= 255)
13942  break;
13943  } else if (Subtarget->isThumb2()) {
13944  // A constant that can be used as an immediate value in a
13945  // data-processing instruction.
13946  if (ARM_AM::getT2SOImmVal(CVal) != -1)
13947  break;
13948  } else {
13949  // A constant that can be used as an immediate value in a
13950  // data-processing instruction.
13951  if (ARM_AM::getSOImmVal(CVal) != -1)
13952  break;
13953  }
13954  return;
13955 
13956  case 'J':
13957  if (Subtarget->isThumb1Only()) {
13958  // This must be a constant between -255 and -1, for negated ADD
13959  // immediates. This can be used in GCC with an "n" modifier that
13960  // prints the negated value, for use with SUB instructions. It is
13961  // not useful otherwise but is implemented for compatibility.
13962  if (CVal >= -255 && CVal <= -1)
13963  break;
13964  } else {
13965  // This must be a constant between -4095 and 4095. It is not clear
13966  // what this constraint is intended for. Implemented for
13967  // compatibility with GCC.
13968  if (CVal >= -4095 && CVal <= 4095)
13969  break;
13970  }
13971  return;
13972 
13973  case 'K':
13974  if (Subtarget->isThumb1Only()) {
13975  // A 32-bit value where only one byte has a nonzero value. Exclude
13976  // zero to match GCC. This constraint is used by GCC internally for
13977  // constants that can be loaded with a move/shift combination.
13978  // It is not useful otherwise but is implemented for compatibility.
13979  if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
13980  break;
13981  } else if (Subtarget->isThumb2()) {
13982  // A constant whose bitwise inverse can be used as an immediate
13983  // value in a data-processing instruction. This can be used in GCC
13984  // with a "B" modifier that prints the inverted value, for use with
13985  // BIC and MVN instructions. It is not useful otherwise but is
13986  // implemented for compatibility.
13987  if (ARM_AM::getT2SOImmVal(~CVal) != -1)
13988  break;
13989  } else {
13990  // A constant whose bitwise inverse can be used as an immediate
13991  // value in a data-processing instruction. This can be used in GCC
13992  // with a "B" modifier that prints the inverted value, for use with
13993  // BIC and MVN instructions. It is not useful otherwise but is
13994  // implemented for compatibility.
13995  if (ARM_AM::getSOImmVal(~CVal) != -1)
13996  break;
13997  }
13998  return;
13999 
14000  case 'L':
14001  if (Subtarget->isThumb1Only()) {
14002  // This must be a constant between -7 and 7,
14003  // for 3-operand ADD/SUB immediate instructions.
14004  if (CVal >= -7 && CVal < 7)
14005  break;
14006  } else if (Subtarget->isThumb2()) {
14007  // A constant whose negation can be used as an immediate value in a
14008  // data-processing instruction. This can be used in GCC with an "n"
14009  // modifier that prints the negated value, for use with SUB
14010  // instructions. It is not useful otherwise but is implemented for
14011  // compatibility.
14012  if (ARM_AM::getT2SOImmVal(-CVal) != -1)
14013  break;
14014  } else {
14015  // A constant whose negation can be used as an immediate value in a
14016  // data-processing instruction. This can be used in GCC with an "n"
14017  // modifier that prints the negated value, for use with SUB
14018  // instructions. It is not useful otherwise but is implemented for
14019  // compatibility.
14020  if (ARM_AM::getSOImmVal(-CVal) != -1)
14021  break;
14022  }
14023  return;
14024 
14025  case 'M':
14026  if (Subtarget->isThumb1Only()) {
14027  // This must be a multiple of 4 between 0 and 1020, for
14028  // ADD sp + immediate.
14029  if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
14030  break;
14031  } else {
14032  // A power of two or a constant between 0 and 32. This is used in
14033  // GCC for the shift amount on shifted register operands, but it is
14034  // useful in general for any shift amounts.
14035  if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
14036  break;
14037  }
14038  return;
14039 
14040  case 'N':
14041  if (Subtarget->isThumb()) { // FIXME thumb2
14042  // This must be a constant between 0 and 31, for shift amounts.
14043  if (CVal >= 0 && CVal <= 31)
14044  break;
14045  }
14046  return;
14047 
14048  case 'O':
14049  if (Subtarget->isThumb()) { // FIXME thumb2
14050  // This must be a multiple of 4 between -508 and 508, for
14051  // ADD/SUB sp = sp + immediate.
14052  if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
14053  break;
14054  }
14055  return;
14056  }
14057  Result = DAG.getTargetConstant(CVal, SDLoc(Op), Op.getValueType());
14058  break;
14059  }
14060 
14061  if (Result.getNode()) {
14062  Ops.push_back(Result);
14063  return;
14064  }
14065  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
14066 }
14067 
14069  const SDNode *N, MVT::SimpleValueType SVT) {
14070  assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
14071  N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
14072  "Unhandled Opcode in getDivRemLibcall");
14073  bool isSigned = N->getOpcode() == ISD::SDIVREM ||
14074  N->getOpcode() == ISD::SREM;
14075  RTLIB::Libcall LC;
14076  switch (SVT) {
14077  default: llvm_unreachable("Unexpected request for libcall!");
14078  case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
14079  case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
14080  case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
14081  case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
14082  }
14083  return LC;
14084 }
14085 
14087  const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget) {
14088  assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
14089  N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
14090  "Unhandled Opcode in getDivRemArgList");
14091  bool isSigned = N->getOpcode() == ISD::SDIVREM ||
14092  N->getOpcode() == ISD::SREM;
14094  TargetLowering::ArgListEntry Entry;
14095  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
14096  EVT ArgVT = N->getOperand(i).getValueType();
14097  Type *ArgTy = ArgVT.getTypeForEVT(*Context);
14098  Entry.Node = N->getOperand(i);
14099  Entry.Ty = ArgTy;
14100  Entry.IsSExt = isSigned;
14101  Entry.IsZExt = !isSigned;
14102  Args.push_back(Entry);
14103  }
14104  if (Subtarget->isTargetWindows() && Args.size() >= 2)
14105  std::swap(Args[0], Args[1]);
14106  return Args;
14107 }
14108 
14109 SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
14110  assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
14111  Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
14112  Subtarget->isTargetWindows()) &&
14113  "Register-based DivRem lowering only");
14114  unsigned Opcode = Op->getOpcode();
14115  assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
14116  "Invalid opcode for Div/Rem lowering");
14117  bool isSigned = (Opcode == ISD::SDIVREM);
14118  EVT VT = Op->getValueType(0);
14119  Type *Ty = VT.getTypeForEVT(*DAG.getContext());
14120  SDLoc dl(Op);
14121 
14122  // If the target has hardware divide, use divide + multiply + subtract:
14123  // div = a / b
14124  // rem = a - b * div
14125  // return {div, rem}
14126  // This should be lowered into UDIV/SDIV + MLS later on.
14127  bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
14128  : Subtarget->hasDivideInARMMode();
14129  if (hasDivide && Op->getValueType(0).isSimple() &&
14130  Op->getSimpleValueType(0) == MVT::i32) {
14131  unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
14132  const SDValue Dividend = Op->getOperand(0);
14133  const SDValue Divisor = Op->getOperand(1);
14134  SDValue Div = DAG.getNode(DivOpcode, dl, VT, Dividend, Divisor);
14135  SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Div, Divisor);
14136  SDValue Rem = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
14137 
14138  SDValue Values[2] = {Div, Rem};
14139  return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VT, VT), Values);
14140  }
14141 
14143  VT.getSimpleVT().SimpleTy);
14144  SDValue InChain = DAG.getEntryNode();
14145 
14147  DAG.getContext(),
14148  Subtarget);
14149 
14150  SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
14151  getPointerTy(DAG.getDataLayout()));
14152 
14153  Type *RetTy = StructType::get(Ty, Ty);
14154 
14155  if (Subtarget->isTargetWindows())
14156  InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain);
14157 
14159  CLI.setDebugLoc(dl).setChain(InChain)
14160  .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
14161  .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);
14162 
14163  std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
14164  return CallInfo.first;
14165 }
14166 
14167 // Lowers REM using divmod helpers
14168 // see RTABI section 4.2/4.3
14169 SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
14170  // Build return types (div and rem)
14171  std::vector<Type*> RetTyParams;
14172  Type *RetTyElement;
14173 
14174  switch (N->getValueType(0).getSimpleVT().SimpleTy) {
14175  default: llvm_unreachable("Unexpected request for libcall!");
14176  case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break;
14177  case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break;
14178  case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break;
14179  case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break;
14180  }
14181 
14182  RetTyParams.push_back(RetTyElement);
14183  RetTyParams.push_back(RetTyElement);
14184  ArrayRef<Type*> ret = ArrayRef<Type*>(RetTyParams);
14185  Type *RetTy = StructType::get(*DAG.getContext(), ret);
14186 
14188  SimpleTy);
14189  SDValue InChain = DAG.getEntryNode();
14191  Subtarget);
14192  bool isSigned = N->getOpcode() == ISD::SREM;
14193  SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
14194  getPointerTy(DAG.getDataLayout()));
14195 
14196  if (Subtarget->isTargetWindows())
14197  InChain = WinDBZCheckDenominator(DAG, N, InChain);
14198 
14199  // Lower call
14200  CallLoweringInfo CLI(DAG);
14201  CLI.setChain(InChain)
14202  .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args))
14203  .setSExtResult(isSigned).setZExtResult(!isSigned).setDebugLoc(SDLoc(N));
14204  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
14205 
14206  // Return second (rem) result operand (first contains div)
14207  SDNode *ResNode = CallResult.first.getNode();
14208  assert(ResNode->getNumOperands() == 2 && "divmod should return two operands");
14209  return ResNode->getOperand(1);
14210 }
14211 
14212 SDValue
14213 ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
14214  assert(Subtarget->isTargetWindows() && "unsupported target platform");
14215  SDLoc DL(Op);
14216 
14217  // Get the inputs.
14218  SDValue Chain = Op.getOperand(0);
14219  SDValue Size = Op.getOperand(1);
14220 
14222  "no-stack-arg-probe")) {
14223  unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
14224  SDValue SP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
14225  Chain = SP.getValue(1);
14226  SP = DAG.getNode(ISD::SUB, DL, MVT::i32, SP, Size);
14227  if (Align)
14228  SP = DAG.getNode(ISD::AND, DL, MVT::i32, SP.getValue(0),
14229  DAG.getConstant(-(uint64_t)Align, DL, MVT::i32));
14230  Chain = DAG.getCopyToReg(Chain, DL, ARM::SP, SP);
14231  SDValue Ops[2] = { SP, Chain };
14232  return DAG.getMergeValues(Ops, DL);
14233  }
14234 
14235  SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
14236  DAG.getConstant(2, DL, MVT::i32));
14237 
14238  SDValue Flag;
14239  Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag);
14240  Flag = Chain.getValue(1);
14241 
14242  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
14243  Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag);
14244 
14245  SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
14246  Chain = NewSP.getValue(1);
14247 
14248  SDValue Ops[2] = { NewSP, Chain };
14249  return DAG.getMergeValues(Ops, DL);
14250 }
14251 
14252 SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
14253  assert(Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() &&
14254  "Unexpected type for custom-lowering FP_EXTEND");
14255 
14256  RTLIB::Libcall LC;
14258 
14259  SDValue SrcVal = Op.getOperand(0);
14260  return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
14261  SDLoc(Op)).first;
14262 }
14263 
14264 SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
14265  assert(Op.getOperand(0).getValueType() == MVT::f64 &&
14266  Subtarget->isFPOnlySP() &&
14267  "Unexpected type for custom-lowering FP_ROUND");
14268 
14269  RTLIB::Libcall LC;
14271 
14272  SDValue SrcVal = Op.getOperand(0);
14273  return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
14274  SDLoc(Op)).first;
14275 }
14276 
14277 bool
14279  // The ARM target isn't yet aware of offsets.
14280  return false;
14281 }
14282 
14284  if (v == 0xffffffff)
14285  return false;
14286 
14287  // there can be 1's on either or both "outsides", all the "inside"
14288  // bits must be 0's
14289  return isShiftedMask_32(~v);
14290 }
14291 
14292 /// isFPImmLegal - Returns true if the target can instruction select the
14293 /// specified FP immediate natively. If false, the legalizer will
14294 /// materialize the FP immediate as a load from a constant pool.
14295 bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
14296  if (!Subtarget->hasVFP3())
14297  return false;
14298  if (VT == MVT::f16 && Subtarget->hasFullFP16())
14299  return ARM_AM::getFP16Imm(Imm) != -1;
14300  if (VT == MVT::f32)
14301  return ARM_AM::getFP32Imm(Imm) != -1;
14302  if (VT == MVT::f64 && !Subtarget->isFPOnlySP())
14303  return ARM_AM::getFP64Imm(Imm) != -1;
14304  return false;
14305 }
14306 
14307 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
14308 /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
14309 /// specified in the intrinsic calls.
14311  const CallInst &I,
14312  MachineFunction &MF,
14313  unsigned Intrinsic) const {
14314  switch (Intrinsic) {
14325  Info.opc = ISD::INTRINSIC_W_CHAIN;
14326  // Conservatively set memVT to the entire set of vectors loaded.
14327  auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
14328  uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
14329  Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
14330  Info.ptrVal = I.getArgOperand(0);
14331  Info.offset = 0;
14332  Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
14333  Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
14334  // volatile loads with NEON intrinsics not supported
14336  return true;
14337  }
14341  Info.opc = ISD::INTRINSIC_W_CHAIN;
14342  // Conservatively set memVT to the entire set of vectors loaded.
14343  auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
14344  uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
14345  Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
14346  Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
14347  Info.offset = 0;
14348  Info.align = 0;
14349  // volatile loads with NEON intrinsics not supported
14351  return true;
14352  }
14360  Info.opc = ISD::INTRINSIC_VOID;
14361  // Conservatively set memVT to the entire set of vectors stored.
14362  auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
14363  unsigned NumElts = 0;
14364  for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
14365  Type *ArgTy = I.getArgOperand(ArgI)->getType();
14366  if (!ArgTy->isVectorTy())
14367  break;
14368  NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
14369  }
14370  Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
14371  Info.ptrVal = I.getArgOperand(0);
14372  Info.offset = 0;
14373  Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
14374  Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
14375  // volatile stores with NEON intrinsics not supported
14377  return true;
14378  }
14382  Info.opc = ISD::INTRINSIC_VOID;
14383  // Conservatively set memVT to the entire set of vectors stored.
14384  auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
14385  unsigned NumElts = 0;
14386  for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
14387  Type *ArgTy = I.getArgOperand(ArgI)->getType();
14388  if (!ArgTy->isVectorTy())
14389  break;
14390  NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
14391  }
14392  Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
14393  Info.ptrVal = I.getArgOperand(0);
14394  Info.offset = 0;
14395  Info.align = 0;
14396  // volatile stores with NEON intrinsics not supported
14398  return true;
14399  }
14400  case Intrinsic::arm_ldaex:
14401  case Intrinsic::arm_ldrex: {
14402  auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
14403  PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
14404  Info.opc = ISD::INTRINSIC_W_CHAIN;
14405  Info.memVT = MVT::getVT(PtrTy->getElementType());
14406  Info.ptrVal = I.getArgOperand(0);
14407  Info.offset = 0;
14408  Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
14410  return true;
14411  }
14412  case Intrinsic::arm_stlex:
14413  case Intrinsic::arm_strex: {
14414  auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
14415  PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
14416  Info.opc = ISD::INTRINSIC_W_CHAIN;
14417  Info.memVT = MVT::getVT(PtrTy->getElementType());
14418  Info.ptrVal = I.getArgOperand(1);
14419  Info.offset = 0;
14420  Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
14422  return true;
14423  }
14424  case Intrinsic::arm_stlexd:
14425  case Intrinsic::arm_strexd:
14426  Info.opc = ISD::INTRINSIC_W_CHAIN;
14427  Info.memVT = MVT::i64;
14428  Info.ptrVal = I.getArgOperand(2);
14429  Info.offset = 0;
14430  Info.align = 8;
14432  return true;
14433 
14434  case Intrinsic::arm_ldaexd:
14435  case Intrinsic::arm_ldrexd:
14436  Info.opc = ISD::INTRINSIC_W_CHAIN;
14437  Info.memVT = MVT::i64;
14438  Info.ptrVal = I.getArgOperand(0);
14439  Info.offset = 0;
14440  Info.align = 8;
14442  return true;
14443 
14444  default:
14445  break;
14446  }
14447 
14448  return false;
14449 }
14450 
14451 /// Returns true if it is beneficial to convert a load of a constant
14452 /// to just the constant itself.
14454  Type *Ty) const {
14455  assert(Ty->isIntegerTy());
14456 
14457  unsigned Bits = Ty->getPrimitiveSizeInBits();
14458  if (Bits == 0 || Bits > 32)
14459  return false;
14460  return true;
14461 }
14462 
14464  unsigned Index) const {
14466  return false;
14467 
14468  return (Index == 0 || Index == ResVT.getVectorNumElements());
14469 }
14470 
14472  ARM_MB::MemBOpt Domain) const {
14473  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
14474 
14475  // First, if the target has no DMB, see what fallback we can use.
14476  if (!Subtarget->hasDataBarrier()) {
14477  // Some ARMv6 cpus can support data barriers with an mcr instruction.
14478  // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
14479  // here.
14480  if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
14482  Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
14483  Builder.getInt32(0), Builder.getInt32(7),
14484  Builder.getInt32(10), Builder.getInt32(5)};
14485  return Builder.CreateCall(MCR, args);
14486  } else {
14487  // Instead of using barriers, atomic accesses on these subtargets use
14488  // libcalls.
14489  llvm_unreachable("makeDMB on a target so old that it has no barriers");
14490  }
14491  } else {
14493  // Only a full system barrier exists in the M-class architectures.
14494  Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
14495  Constant *CDomain = Builder.getInt32(Domain);
14496  return Builder.CreateCall(DMB, CDomain);
14497  }
14498 }
14499 
14500 // Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
14502  Instruction *Inst,
14503  AtomicOrdering Ord) const {
14504  switch (Ord) {
14507  llvm_unreachable("Invalid fence: unordered/non-atomic");
14510  return nullptr; // Nothing to do
14512  if (!Inst->hasAtomicStore())
14513  return nullptr; // Nothing to do
14517  if (Subtarget->preferISHSTBarriers())
14518  return makeDMB(Builder, ARM_MB::ISHST);
14519  // FIXME: add a comment with a link to documentation justifying this.
14520  else
14521  return makeDMB(Builder, ARM_MB::ISH);
14522  }
14523  llvm_unreachable("Unknown fence ordering in emitLeadingFence");
14524 }
14525 
14527  Instruction *Inst,
14528  AtomicOrdering Ord) const {
14529  switch (Ord) {
14532  llvm_unreachable("Invalid fence: unordered/not-atomic");
14535  return nullptr; // Nothing to do
14539  return makeDMB(Builder, ARM_MB::ISH);
14540  }
14541  llvm_unreachable("Unknown fence ordering in emitTrailingFence");
14542 }
14543 
14544 // Loads and stores less than 64-bits are already atomic; ones above that
14545 // are doomed anyway, so defer to the default libcall and blame the OS when
14546 // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
14547 // anything for those.
14549  unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
14550  return (Size == 64) && !Subtarget->isMClass();
14551 }
14552 
14553 // Loads and stores less than 64-bits are already atomic; ones above that
14554 // are doomed anyway, so defer to the default libcall and blame the OS when
14555 // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
14556 // anything for those.
14557 // FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
14558 // guarantee, see DDI0406C ARM architecture reference manual,
14559 // sections A8.8.72-74 LDRD)
14562  unsigned Size = LI->getType()->getPrimitiveSizeInBits();
14563  return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLOnly
14565 }
14566 
14567 // For the real atomic operations, we have ldrex/strex up to 32 bits,
14568 // and up to 64 bits on the non-M profiles
14571  unsigned Size = AI->getType()->getPrimitiveSizeInBits();
14572  bool hasAtomicRMW = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
14573  return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW)
14576 }
14577 
14580  // At -O0, fast-regalloc cannot cope with the live vregs necessary to
14581  // implement cmpxchg without spilling. If the address being exchanged is also
14582  // on the stack and close enough to the spill slot, this can lead to a
14583  // situation where the monitor always gets cleared and the atomic operation
14584  // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
14585  bool HasAtomicCmpXchg =
14586  !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
14587  if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg)
14590 }
14591 
14593  const Instruction *I) const {
14594  return InsertFencesForAtomic;
14595 }
14596 
14597 // This has so far only been implemented for MachO.
14599  return Subtarget->isTargetMachO();
14600 }
14601 
14603  unsigned &Cost) const {
14604  // If we do not have NEON, vector types are not natively supported.
14605  if (!Subtarget->hasNEON())
14606  return false;
14607 
14608  // Floating point values and vector values map to the same register file.
14609  // Therefore, although we could do a store extract of a vector type, this is
14610  // better to leave at float as we have more freedom in the addressing mode for
14611  // those.
14612  if (VectorTy->isFPOrFPVectorTy())
14613  return false;
14614 
14615  // If the index is unknown at compile time, this is very expensive to lower
14616  // and it is not possible to combine the store with the extract.
14617  if (!isa<ConstantInt>(Idx))
14618  return false;
14619 
14620  assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
14621  unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth();
14622  // We can do a store + vector extract on any vector that fits perfectly in a D
14623  // or Q register.
14624  if (BitWidth == 64 || BitWidth == 128) {
14625  Cost = 0;
14626  return true;
14627  }
14628  return false;
14629 }
14630 
14632  return Subtarget->hasV6T2Ops();
14633 }
14634 
14636  return Subtarget->hasV6T2Ops();
14637 }
14638 
14640  AtomicOrdering Ord) const {
14641  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
14642  Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
14643  bool IsAcquire = isAcquireOrStronger(Ord);
14644 
14645  // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
14646  // intrinsic must return {i32, i32} and we have to recombine them into a
14647  // single i64 here.
14648  if (ValTy->getPrimitiveSizeInBits() == 64) {
14649  Intrinsic::ID Int =
14651  Function *Ldrex = Intrinsic::getDeclaration(M, Int);
14652 
14653  Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
14654  Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
14655 
14656  Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
14657  Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
14658  if (!Subtarget->isLittle())
14659  std::swap (Lo, Hi);
14660  Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
14661  Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
14662  return Builder.CreateOr(
14663  Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64");
14664  }
14665 
14666  Type *Tys[] = { Addr->getType() };
14668  Function *Ldrex = Intrinsic::getDeclaration(M, Int, Tys);
14669 
14670  return Builder.CreateTruncOrBitCast(
14671  Builder.CreateCall(Ldrex, Addr),
14672  cast<PointerType>(Addr->getType())->getElementType());
14673 }
14674 
14676  IRBuilder<> &Builder) const {
14677  if (!Subtarget->hasV7Ops())
14678  return;
14679  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
14681 }
14682 
14684  Value *Addr,
14685  AtomicOrdering Ord) const {
14686  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
14687  bool IsRelease = isReleaseOrStronger(Ord);
14688 
14689  // Since the intrinsics must have legal type, the i64 intrinsics take two
14690  // parameters: "i32, i32". We must marshal Val into the appropriate form
14691  // before the call.
14692  if (Val->getType()->getPrimitiveSizeInBits() == 64) {
14693  Intrinsic::ID Int =
14695  Function *Strex = Intrinsic::getDeclaration(M, Int);
14697 
14698  Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
14699  Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
14700  if (!Subtarget->isLittle())
14701  std::swap(Lo, Hi);
14702  Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
14703  return Builder.CreateCall(Strex, {Lo, Hi, Addr});
14704  }
14705 
14707  Type *Tys[] = { Addr->getType() };
14708  Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
14709 
14710  return Builder.CreateCall(
14711  Strex, {Builder.CreateZExtOrBitCast(
14712  Val, Strex->getFunctionType()->getParamType(0)),
14713  Addr});
14714 }
14715 
14716 
14718  return Subtarget->isMClass();
14719 }
14720 
14721 /// A helper function for determining the number of interleaved accesses we
14722 /// will generate when lowering accesses of the given type.
14723 unsigned
14725  const DataLayout &DL) const {
14726  return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
14727 }
14728 
14730  VectorType *VecTy, const DataLayout &DL) const {
14731 
14732  unsigned VecSize = DL.getTypeSizeInBits(VecTy);
14733  unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
14734 
14735  // Ensure the vector doesn't have f16 elements. Even though we could do an
14736  // i16 vldN, we can't hold the f16 vectors and will end up converting via
14737  // f32.
14738  if (VecTy->getElementType()->isHalfTy())
14739  return false;
14740 
14741  // Ensure the number of vector elements is greater than 1.
14742  if (VecTy->getNumElements() < 2)
14743  return false;
14744 
14745  // Ensure the element type is legal.
14746  if (ElSize != 8 && ElSize != 16 && ElSize != 32)
14747  return false;
14748 
14749  // Ensure the total vector size is 64 or a multiple of 128. Types larger than
14750  // 128 will be split into multiple interleaved accesses.
14751  return VecSize == 64 || VecSize % 128 == 0;
14752 }
14753 
14754 /// Lower an interleaved load into a vldN intrinsic.
14755 ///
14756 /// E.g. Lower an interleaved load (Factor = 2):
14757 /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
14758 /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
14759 /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
14760 ///
14761 /// Into:
14762 /// %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4)
14763 /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0
14764 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1
14767  ArrayRef<unsigned> Indices, unsigned Factor) const {
14768  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
14769  "Invalid interleave factor");
14770  assert(!Shuffles.empty() && "Empty shufflevector input");
14771  assert(Shuffles.size() == Indices.size() &&
14772  "Unmatched number of shufflevectors and indices");
14773 
14774  VectorType *VecTy = Shuffles[0]->getType();
14775  Type *EltTy = VecTy->getVectorElementType();
14776 
14777  const DataLayout &DL = LI->getModule()->getDataLayout();
14778 
14779  // Skip if we do not have NEON and skip illegal vector types. We can
14780  // "legalize" wide vector types into multiple interleaved accesses as long as
14781  // the vector types are divisible by 128.
14782  if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VecTy, DL))
14783  return false;
14784 
14785  unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
14786 
14787  // A pointer vector can not be the return type of the ldN intrinsics. Need to
14788  // load integer vectors first and then convert to pointer vectors.
14789  if (EltTy->isPointerTy())
14790  VecTy =
14791  VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements());
14792 
14793  IRBuilder<> Builder(LI);
14794 
14795  // The base address of the load.
14796  Value *BaseAddr = LI->getPointerOperand();
14797 
14798  if (NumLoads > 1) {
14799  // If we're going to generate more than one load, reset the sub-vector type
14800  // to something legal.
14801  VecTy = VectorType::get(VecTy->getVectorElementType(),
14802  VecTy->getVectorNumElements() / NumLoads);
14803 
14804  // We will compute the pointer operand of each load from the original base
14805  // address using GEPs. Cast the base address to a pointer to the scalar
14806  // element type.
14807  BaseAddr = Builder.CreateBitCast(
14808  BaseAddr, VecTy->getVectorElementType()->getPointerTo(
14809  LI->getPointerAddressSpace()));
14810  }
14811 
14812  assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!");
14813 
14814  Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace());
14815  Type *Tys[] = {VecTy, Int8Ptr};
14816  static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
14819  Function *VldnFunc =
14820  Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
14821 
14822  // Holds sub-vectors extracted from the load intrinsic return values. The
14823  // sub-vectors are associated with the shufflevector instructions they will
14824  // replace.
14826 
14827  for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
14828  // If we're generating more than one load, compute the base address of
14829  // subsequent loads as an offset from the previous.
14830  if (LoadCount > 0)
14831  BaseAddr = Builder.CreateConstGEP1_32(
14832  BaseAddr, VecTy->getVectorNumElements() * Factor);
14833 
14835  Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
14836  Ops.push_back(Builder.getInt32(LI->getAlignment()));
14837 
14838  CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN");
14839 
14840  // Replace uses of each shufflevector with the corresponding vector loaded
14841  // by ldN.
14842  for (unsigned i = 0; i < Shuffles.size(); i++) {
14843  ShuffleVectorInst *SV = Shuffles[i];
14844  unsigned Index = Indices[i];
14845 
14846  Value *SubVec = Builder.CreateExtractValue(VldN, Index);
14847 
14848  // Convert the integer vector to pointer vector if the element is pointer.
14849  if (EltTy->isPointerTy())
14850  SubVec = Builder.CreateIntToPtr(
14851  SubVec, VectorType::get(SV->getType()->getVectorElementType(),
14852  VecTy->getVectorNumElements()));
14853 
14854  SubVecs[SV].push_back(SubVec);
14855  }
14856  }
14857 
14858  // Replace uses of the shufflevector instructions with the sub-vectors
14859  // returned by the load intrinsic. If a shufflevector instruction is
14860  // associated with more than one sub-vector, those sub-vectors will be
14861  // concatenated into a single wide vector.
14862  for (ShuffleVectorInst *SVI : Shuffles) {
14863  auto &SubVec = SubVecs[SVI];
14864  auto *WideVec =
14865  SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
14866  SVI->replaceAllUsesWith(WideVec);
14867  }
14868 
14869  return true;
14870 }
14871 
14872 /// Lower an interleaved store into a vstN intrinsic.
14873 ///
14874 /// E.g. Lower an interleaved store (Factor = 3):
14875 /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
14876 /// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
14877 /// store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4
14878 ///
14879 /// Into:
14880 /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
14881 /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
14882 /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
14883 /// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
14884 ///
14885 /// Note that the new shufflevectors will be removed and we'll only generate one
14886 /// vst3 instruction in CodeGen.
14887 ///
14888 /// Example for a more general valid mask (Factor 3). Lower:
14889 /// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
14890 /// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
14891 /// store <12 x i32> %i.vec, <12 x i32>* %ptr
14892 ///
14893 /// Into:
14894 /// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
14895 /// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
14896 /// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
14897 /// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
14899  ShuffleVectorInst *SVI,
14900  unsigned Factor) const {
14901  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
14902  "Invalid interleave factor");
14903 
14904  VectorType *VecTy = SVI->getType();
14905  assert(VecTy->getVectorNumElements() % Factor == 0 &&
14906  "Invalid interleaved store");
14907 
14908  unsigned LaneLen = VecTy->getVectorNumElements() / Factor;
14909  Type *EltTy = VecTy->getVectorElementType();
14910  VectorType *SubVecTy = VectorType::get(EltTy, LaneLen);
14911 
14912  const DataLayout &DL = SI->getModule()->getDataLayout();
14913 
14914  // Skip if we do not have NEON and skip illegal vector types. We can
14915  // "legalize" wide vector types into multiple interleaved accesses as long as
14916  // the vector types are divisible by 128.
14917  if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(SubVecTy, DL))
14918  return false;
14919 
14920  unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
14921 
14922  Value *Op0 = SVI->getOperand(0);
14923  Value *Op1 = SVI->getOperand(1);
14924  IRBuilder<> Builder(SI);
14925 
14926  // StN intrinsics don't support pointer vectors as arguments. Convert pointer
14927  // vectors to integer vectors.
14928  if (EltTy->isPointerTy()) {
14929  Type *IntTy = DL.getIntPtrType(EltTy);
14930 
14931  // Convert to the corresponding integer vector.
14932  Type *IntVecTy =
14933  VectorType::get(IntTy, Op0->getType()->getVectorNumElements());
14934  Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
14935  Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
14936 
14937  SubVecTy = VectorType::get(IntTy, LaneLen);
14938  }
14939 
14940  // The base address of the store.
14941  Value *BaseAddr = SI->getPointerOperand();
14942 
14943  if (NumStores > 1) {
14944  // If we're going to generate more than one store, reset the lane length
14945  // and sub-vector type to something legal.
14946  LaneLen /= NumStores;
14947  SubVecTy = VectorType::get(SubVecTy->getVectorElementType(), LaneLen);
14948 
14949  // We will compute the pointer operand of each store from the original base
14950  // address using GEPs. Cast the base address to a pointer to the scalar
14951  // element type.
14952  BaseAddr = Builder.CreateBitCast(
14953  BaseAddr, SubVecTy->getVectorElementType()->getPointerTo(
14954  SI->getPointerAddressSpace()));
14955  }
14956 
14957  assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!");
14958 
14959  auto Mask = SVI->getShuffleMask();
14960 
14961  Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
14962  Type *Tys[] = {Int8Ptr, SubVecTy};
14963  static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
14966 
14967  for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
14968  // If we generating more than one store, we compute the base address of
14969  // subsequent stores as an offset from the previous.
14970  if (StoreCount > 0)
14971  BaseAddr = Builder.CreateConstGEP1_32(BaseAddr, LaneLen * Factor);
14972 
14974  Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
14975 
14976  Function *VstNFunc =
14977  Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys);
14978 
14979  // Split the shufflevector operands into sub vectors for the new vstN call.
14980  for (unsigned i = 0; i < Factor; i++) {
14981  unsigned IdxI = StoreCount * LaneLen * Factor + i;
14982  if (Mask[IdxI] >= 0) {
14983  Ops.push_back(Builder.CreateShuffleVector(
14984  Op0, Op1, createSequentialMask(Builder, Mask[IdxI], LaneLen, 0)));
14985  } else {
14986  unsigned StartMask = 0;
14987  for (unsigned j = 1; j < LaneLen; j++) {
14988  unsigned IdxJ = StoreCount * LaneLen * Factor + j;
14989  if (Mask[IdxJ * Factor + IdxI] >= 0) {
14990  StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
14991  break;
14992  }
14993  }
14994  // Note: If all elements in a chunk are undefs, StartMask=0!
14995  // Note: Filling undef gaps with random elements is ok, since
14996  // those elements were being written anyway (with undefs).
14997  // In the case of all undefs we're defaulting to using elems from 0
14998  // Note: StartMask cannot be negative, it's checked in
14999  // isReInterleaveMask
15000  Ops.push_back(Builder.CreateShuffleVector(
15001  Op0, Op1, createSequentialMask(Builder, StartMask, LaneLen, 0)));
15002  }
15003  }
15004 
15005  Ops.push_back(Builder.getInt32(SI->getAlignment()));
15006  Builder.CreateCall(VstNFunc, Ops);
15007  }
15008  return true;
15009 }
15010 
15017 };
15018 
15020  uint64_t &Members) {
15021  if (auto *ST = dyn_cast<StructType>(Ty)) {
15022  for (unsigned i = 0; i < ST->getNumElements(); ++i) {
15023  uint64_t SubMembers = 0;
15024  if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
15025  return false;
15026  Members += SubMembers;
15027  }
15028  } else if (auto *AT = dyn_cast<ArrayType>(Ty)) {
15029  uint64_t SubMembers = 0;
15030  if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
15031  return false;
15032  Members += SubMembers * AT->getNumElements();
15033  } else if (Ty->isFloatTy()) {
15034  if (Base != HA_UNKNOWN && Base != HA_FLOAT)
15035  return false;
15036  Members = 1;
15037  Base = HA_FLOAT;
15038  } else if (Ty->isDoubleTy()) {
15039  if (Base != HA_UNKNOWN && Base != HA_DOUBLE)
15040  return false;
15041  Members = 1;
15042  Base = HA_DOUBLE;
15043  } else if (auto *VT = dyn_cast<VectorType>(Ty)) {
15044  Members = 1;
15045  switch (Base) {
15046  case HA_FLOAT:
15047  case HA_DOUBLE:
15048  return false;
15049  case HA_VECT64:
15050  return VT->getBitWidth() == 64;
15051  case HA_VECT128:
15052  return VT->getBitWidth() == 128;
15053  case HA_UNKNOWN:
15054  switch (VT->getBitWidth()) {
15055  case 64:
15056  Base = HA_VECT64;
15057  return true;
15058  case 128:
15059  Base = HA_VECT128;
15060  return true;
15061  default:
15062  return false;
15063  }
15064  }
15065  }
15066 
15067  return (Members > 0 && Members <= 4);
15068 }
15069 
15070 /// Return the correct alignment for the current calling convention.
15071 unsigned
15073  DataLayout DL) const {
15074  if (!ArgTy->isVectorTy())
15075  return DL.getABITypeAlignment(ArgTy);
15076 
15077  // Avoid over-aligning vector parameters. It would require realigning the
15078  // stack and waste space for no real benefit.
15079  return std::min(DL.getABITypeAlignment(ArgTy), DL.getStackAlignment());
15080 }
15081 
15082 /// Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
15083 /// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
15084 /// passing according to AAPCS rules.
15086  Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
15087  if (getEffectiveCallingConv(CallConv, isVarArg) !=
15089  return false;
15090 
15092  uint64_t Members = 0;
15093  bool IsHA = isHomogeneousAggregate(Ty, Base, Members);
15094  LLVM_DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump());
15095 
15096  bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
15097  return IsHA || IsIntArray;
15098 }
15099 
15101  const Constant *PersonalityFn) const {
15102  // Platforms which do not use SjLj EH may return values in these registers
15103  // via the personality function.
15104  return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R0;
15105 }
15106 
15108  const Constant *PersonalityFn) const {
15109  // Platforms which do not use SjLj EH may return values in these registers
15110  // via the personality function.
15111  return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R1;
15112 }
15113 
15114 void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
15115  // Update IsSplitCSR in ARMFunctionInfo.
15116  ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();
15117  AFI->setIsSplitCSR(true);
15118 }
15119 
15120 void ARMTargetLowering::insertCopiesSplitCSR(
15121  MachineBasicBlock *Entry,
15122  const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
15123  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
15124  const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
15125  if (!IStart)
15126  return;
15127 
15128  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
15129  MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
15130  MachineBasicBlock::iterator MBBI = Entry->begin();
15131  for (const MCPhysReg *I = IStart; *I; ++I) {
15132  const TargetRegisterClass *RC = nullptr;
15133  if (ARM::GPRRegClass.contains(*I))
15134  RC = &ARM::GPRRegClass;
15135  else if (ARM::DPRRegClass.contains(*I))
15136  RC = &ARM::DPRRegClass;
15137  else
15138  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
15139 
15140  unsigned NewVR = MRI->createVirtualRegister(RC);
15141  // Create copy from CSR to a virtual register.
15142  // FIXME: this currently does not emit CFI pseudo-instructions, it works
15143  // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
15144  // nounwind. If we want to generalize this later, we may need to emit
15145  // CFI pseudo-instructions.
15148  "Function should be nounwind in insertCopiesSplitCSR!");
15149  Entry->addLiveIn(*I);
15150  BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
15151  .addReg(*I);
15152 
15153  // Insert the copy-back instructions right before the terminator.
15154  for (auto *Exit : Exits)
15155  BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
15156  TII->get(TargetOpcode::COPY), *I)
15157  .addReg(NewVR);
15158  }
15159 }
15160 
15164 }
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG)
PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG)
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG)
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs...
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
bool hasV5TEOps() const
Definition: ARMSubtarget.h:535
bool isMachineConstantPoolEntry() const
Type * getVectorElementType() const
Definition: Type.h:371
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
void setFrameAddressIsTaken(bool T)
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:571
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:538
Value * getValueOperand()
Definition: Instructions.h:410
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
Definition: ARMBaseInfo.h:266
static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC, SelectionDAG &DAG)
BC is a bitcast that is about to be turned into a VMOVDRR.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set, or Regs.size() if they are all allocated.
static MVT getIntegerVT(unsigned BitWidth)
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
const MachineInstrBuilder & add(const MachineOperand &MO) const
static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT, SelectionDAG &DAG)
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG)
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
static bool isVZIPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:594
EVT getValueType() const
Return the ValueType of the referenced return value.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isTargetGNUAEABI() const
Definition: ARMSubtarget.h:672
Value * CreateConstGEP1_32(Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1516
static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
void markGlobalAsPromotedToConstantPool(const GlobalVariable *GV)
Indicate to the backend that GV has had its storage changed to inside a constant pool.
void finalizeLowering(MachineFunction &MF) const override
Execute target specific actions to finalize target lowering.
static SDValue PerformBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformBUILD_VECTORCombine - Target-specific dag combine xforms for ISD::BUILD_VECTOR.
bool hasCallSiteLandingPad(MCSymbol *Sym)
Return true if the landing pad Eh symbol has an associated call site.
static bool isConstant(const MachineInstr &MI)
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt)
isVShiftLImm - Check if this is a valid build_vector for the immediate operand of a vector shift left...
bool isUndef() const
ARM_AAPCS - ARM Architecture Procedure Calling Standard calling convention (aka EABI).
Definition: CallingConv.h:100
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
static MachinePointerInfo getJumpTable(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a jump table entry.
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:35
static const APInt * isPowerOf2Constant(SDValue V)
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand...
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the value type to use for ISD::SETCC.
const GlobalValue * getGlobal() const
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant, which is required to be operand #1) half of the integer or float value specified as operand #0.
Definition: ISDOpcodes.h:184
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1563
static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN)
FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
#define R4(n)
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:562
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
LLVMContext & Context
static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG)
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
MachineBasicBlock * getMBB() const
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:561
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it&#39;s not CSE&#39;d)...
Definition: SelectionDAG.h:836
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain)
Atomic ordering constants.
int getFunctionContextIndex() const
Return the index for the function context object.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector...
static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG)
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the &#39;hasPostISelHook&#39; flag...
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:358
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:650
bool isThumb() const
Definition: ARMSubtarget.h:712
This class represents lattice values for constants.
Definition: AllocatorList.h:24
static SDValue PerformVMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMULCombine Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the special multi...
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
bool hasDivideInThumbMode() const
Definition: ARMSubtarget.h:584
static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, SelectionDAG &DAG)
std::pair< unsigned, const TargetRegisterClass * > RCPair
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
const unsigned char * bytes_end() const
Definition: StringRef.h:113
int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static MVT getVectorVT(MVT VT, unsigned NumElements)
static bool IsVUZPShuffleNode(SDNode *N)
TOF
Target Operand Flag enum.
Definition: ARMBaseInfo.h:233
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0...
Definition: ISDOpcodes.h:605
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
StringRef getPrivateGlobalPrefix() const
Definition: DataLayout.h:294
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:367
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:260
static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:42
LLVM_NODISCARD bool equals_lower(StringRef RHS) const
equals_lower - Check for string equality, ignoring case.
Definition: StringRef.h:176
unsigned EnableFastISel
EnableFastISel - This flag enables fast-path instruction selection which trades away generated code q...
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
iterator begin() const
Definition: ArrayRef.h:137
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
const char * LowerXConstraint(EVT ConstraintVT) const override
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
static SDValue PerformADDECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDECombine - Target-specific dag combine transform from ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL.
void setIsEHPad(bool V=true)
Indicates the block is a landing pad.
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
Instruction * emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool isOSBinFormatELF() const
Tests whether the OS uses the ELF binary format.
Definition: Triple.h:604
an instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
Definition: Instructions.h:529
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
bool isLittle() const
Definition: ARMSubtarget.h:751
const SDValue & getBasePtr() const
static SDValue PerformInsertEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformInsertEltCombine - Target-specific dag combine xforms for ISD::INSERT_VECTOR_ELT.
EABI EABIVersion
EABIVersion - This flag specifies the EABI version.
static bool isVTRNMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG &DAG)
SkipLoadExtensionForVMULL - return a load of the original vector size that does not do any sign/zero ...
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:383
SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const
PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
ARMConstantPoolValue - ARM specific constantpool value.
static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, bool isSigned)
isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each element has been zero/sign-...
unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:164
unsigned getReg() const
getReg - Returns the register number.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
Definition: StringRef.h:138
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override
static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, uint64_t &Members)
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
Y = RRC X, rotate right via carry.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
getTgtMemIntrinsic - Represent NEON load and store intrinsics as MemIntrinsicNodes.
static bool hasNormalLoadOperand(SDNode *N)
hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node are normal, non-volatile loads.
const SDValue & getValue() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isLegalT1ScaledAddressingMode(const AddrMode &AM, EVT VT) const
Returns true if the addresing mode representing by AM is legal for the Thumb1 target, for a load/store of the specified type.
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain...
Definition: ISDOpcodes.h:699
SDVTList getVTList() const
unsigned getABIAlignmentForCallingConv(Type *ArgTy, DataLayout DL) const override
Return the correct alignment for the current calling convention.
This class represents a function call, abstracting a target machine&#39;s calling convention.
unsigned Reg
EK_Inline - Jump table entries are emitted inline at their point of use.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:648
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
Global Offset Table, Thread Pointer Offset.
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
static unsigned SelectPairHalf(unsigned Elements, ArrayRef< int > Mask, unsigned Index)
static MVT getFloatingPointVT(unsigned BitWidth)
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:251
bool hasV7Ops() const
Definition: ARMSubtarget.h:540
const std::string & getAsmString() const
Definition: InlineAsm.h:81
const SDValue & getChain() const
static bool isLegalT1AddressImmediate(int64_t V, EVT VT)
static cl::opt< bool > ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true))
Function Alias Analysis Results
This instruction constructs a fixed permutation of two input vectors.
bool hasVFP3() const
Definition: ARMSubtarget.h:568
bool isTargetCOFF() const
Definition: ARMSubtarget.h:656
unsigned getValNo() const
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const
unsigned getAlignment() const
int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG)
SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending load, or BUILD_VECTOR with extended elements, return the unextended value.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:321
static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, const ARMSubtarget *ST, const SDLoc &dl)
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
auto count_if(R &&Range, UnaryPredicate P) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1260
bool sgt(const APInt &RHS) const
Signed greather than comparison.
Definition: APInt.h:1274
MO_GOT - On a symbol operand, this represents a GOT relative relocation.
Definition: ARMBaseInfo.h:257
bool useNaClTrap() const
Definition: ARMSubtarget.h:629
unsigned second
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
bool hasDLLImportStorageClass() const
Definition: GlobalValue.h:262
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1186
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:811
static uint32_t Concat[]
virtual const TargetRegisterClass * getRegClassFor(MVT VT) const
Return the register class that should be used for the specified value type.
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
bool hasV6Ops() const
Definition: ARMSubtarget.h:536
void setIsDead(bool Val=true)
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:141
F(f)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
An instruction for reading from memory.
Definition: Instructions.h:168
static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1...
bool isThumb1Only() const
Definition: ARMSubtarget.h:713
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL) const
Soften the operands of a comparison.
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:437
bool isTargetMuslAEABI() const
Definition: ARMSubtarget.h:677
[US]{MIN/MAX} - Binary minimum or maximum or signed or unsigned integers.
Definition: ISDOpcodes.h:384
const SDNodeFlags getFlags() const
an instruction that atomically reads a memory location, combines it with another value, and then stores the result back.
Definition: Instructions.h:692
SDNode * getNode() const
get the SDNode which holds the desired result
#define R2(n)
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1437
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned char TargetFlags=0)
unsigned createNEONModImm(unsigned OpCmode, unsigned Val)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
bool hasAcquireRelease() const
Definition: ARMSubtarget.h:589
bool alignLoopsWithOptSize() const override
Should loops be aligned even when the function is marked OptSize (but not MinSize).
static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
#define op(i)
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
Same for subtraction.
Definition: ISDOpcodes.h:254
Global Offset Table, PC Relative.
Value * CallOperandVal
If this is the result output operand or a clobber, this is null, otherwise it is the incoming operand...
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
static IntegerType * getInt16Ty(LLVMContext &C)
Definition: Type.cpp:175
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override
createFastISel - This method returns a target specific FastISel object, or null if the target does no...
Thread Pointer Offset.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue CombineBaseUpdate(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineBaseUpdate - Target-specific DAG combine function for VLDDUP, NEON load/store intrinsics...
bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
static SDValue PerformSUBCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:189
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst *> Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vldN intrinsic.
bool isTargetHardFloat() const
uint64_t High
uint64_t decodeNEONModImm(unsigned ModImm, unsigned &EltBits)
decodeNEONModImm - Decode a NEON modified immediate value into the element value and the element size...
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:405
static bool isThumb(const MCSubtargetInfo &STI)
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:781
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:435
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition: ISDOpcodes.h:316
static bool allUsersAreInFunction(const Value *V, const Function *F)
Return true if all users of V are within function F, looking through ConstantExprs.
bool isFPOnlySP() const
Definition: ARMSubtarget.h:599
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1509
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
SDValue getExternalSymbol(const char *Sym, EVT VT)
return AArch64::GPR64RegClass contains(Reg)
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:130
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
bool isTruncatingStore() const
Return true if the op does a truncation before store.
bool isMemLoc() const
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vstN intrinsic.
MachineJumpTableInfo * getOrCreateJumpTableInfo(unsigned JTEntryKind)
getOrCreateJumpTableInfo - Get the JumpTableInfo for this function, if it does already exist...
bool genExecuteOnly() const
Definition: ARMSubtarget.h:633
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode *> &Visited, SmallVectorImpl< const SDNode *> &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
bool needsCustom() const
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG)
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:491
bool hasV8MBaselineOps() const
Definition: ARMSubtarget.h:547
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic...
Definition: ISDOpcodes.h:114
bool hasStructRetAttr() const
Determine if the function returns a structure through first or second pointer argument.
Definition: Function.h:579
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:210
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1135
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:136
bool isTargetELF() const
Definition: ARMSubtarget.h:657
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations...
Definition: ISDOpcodes.h:456
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1632
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
The address of a basic block.
Definition: Constants.h:840
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "...
Definition: ARMBaseInfo.h:254
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
A description of a memory reference used in the backend.
amdgpu Simplify well known AMD library false Value Value const Twine & Name
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
MO_SBREL - On a symbol operand, this represents a static base relative relocation.
Definition: ARMBaseInfo.h:261
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:371
static SDValue PerformLOADCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:211
const HexagonInstrInfo * TII
bool hasARMOps() const
Definition: ARMSubtarget.h:565
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a vldN-lane (N > 1) intrinsic...
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
Shift and rotation operations.
Definition: ISDOpcodes.h:410
static bool isVUZPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:412
static bool isUpperSaturate(const SDValue LHS, const SDValue RHS, const SDValue TrueVal, const SDValue FalseVal, const ISD::CondCode CC, const SDValue K)
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:478
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:244
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef...
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth...
Definition: ISDOpcodes.h:393
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
static SDValue findMUL_LOHI(SDValue V)
static bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
static bool isVZIP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of "vector_shuffle v...
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:652
Value * emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
unsigned getFrameRegister(const MachineFunction &MF) const override
CallLoweringInfo & setChain(SDValue InChain)
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:191
static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG)
PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for ISD::VECTOR_SHUFFLE.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
separate const offset from Split GEPs to a variadic base and a constant offset for better CSE
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:170
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:743
unsigned getScalarValueSizeInBits() const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
FLT_ROUNDS_ - Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest 2 Round to ...
Definition: ISDOpcodes.h:546
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const override
Returns the target specific optimal type for load and store operations as a result of memset...
This file contains the simple types necessary to represent the attributes associated with functions a...
SimpleValueType SimpleTy
static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getZeroVector - Returns a vector of specified type with all zero elements.
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:304
The memory access is dereferenceable (i.e., doesn&#39;t trap).
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amt) For double-word atomic operations: ValLo, ValHi, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amtLo, amtHi) ValLo, ValHi, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amtLo, amtHi) These correspond to the atomicrmw instruction.
Definition: ISDOpcodes.h:810
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:460
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
static bool isSingletonVEXTMask(ArrayRef< int > M, EVT VT, unsigned &Imm)
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:401
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
static bool isZeroExtended(SDNode *N, SelectionDAG &DAG)
isZeroExtended - Check if a node is a vector value that is zero-extended or a constant BUILD_VECTOR w...
static ShiftOpc getShiftOpcForNode(unsigned Opcode)
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG...
Definition: ISDOpcodes.h:73
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE R Default(T Value)
Definition: StringSwitch.h:203
uint64_t getNumElements() const
Definition: DerivedTypes.h:359
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
SmallVectorImpl< unsigned > & getCallSiteLandingPad(MCSymbol *Sym)
Get the call site indexes for a landing pad EH symbol.
bool hasPerfMon() const
Definition: ARMSubtarget.h:600
LocInfo getLocInfo() const
static SDValue PerformMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:978
#define im(i)
KnownBits zext(unsigned BitWidth)
Zero extends the underlying known Zero and One bits.
Definition: KnownBits.h:119
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
static StructType * get(LLVMContext &Context, ArrayRef< Type *> Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:342
This file implements a class to represent arbitrary precision integral constant values and operations...
void SplitString(StringRef Source, SmallVectorImpl< StringRef > &OutFragments, StringRef Delimiters=" \\\)
SplitString - Split up the specified string according to the specified delimiters, appending the result fragments to the output list.
static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2)
Return the store opcode for a given store size.
unsigned getArgRegsSaveSize() const
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPostIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mo...
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
SmallVector< ISD::InputArg, 32 > Ins
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
static SDValue AddCombineTo64bitMLAL(SDNode *AddeSubeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:695
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const override
Return true if the target can combine store(extractelement VectorTy, Idx).
bool hasVFP2() const
Definition: ARMSubtarget.h:567
unsigned getSizeInBits() const
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
static const unsigned PerfectShuffleTable[6561+1]
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1727
static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG)
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1533
int64_t getSExtValue() const
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:419
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:298
This is a fast-path instruction selection class that generates poor code and doesn&#39;t support illegal ...
Definition: FastISel.h:67
Value * emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type...
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1732
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1575
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:406
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself...
Constant * createSequentialMask(IRBuilder<> &Builder, unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:398
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:478
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
bool hasDSP() const
Definition: ARMSubtarget.h:628
OUTCHAIN = EH_SJLJ_SETUP_DISPATCH(INCHAIN) The target initializes the dispatch table here...
Definition: ISDOpcodes.h:118
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
bool useMovt(const MachineFunction &MF) const
SDValue getRegisterMask(const uint32_t *RegMask)
static SDValue promoteToConstantPool(const ARMTargetLowering *TLI, const GlobalValue *GV, SelectionDAG &DAG, EVT PtrVT, const SDLoc &dl)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:402
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:121
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG)
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:429
static bool isSRL16(const SDValue &Op)
This contains information for each constraint that we are lowering.
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
void addInRegsParamInfo(unsigned RegBegin, unsigned RegEnd)
static bool isStore(int Opcode)
SmallVector< ISD::OutputArg, 32 > Outs
bool useNEONForSinglePrecisionFP() const
Definition: ARMSubtarget.h:580
bool hasFPAO() const
Definition: ARMSubtarget.h:604
bool hasV6T2Ops() const
Definition: ARMSubtarget.h:539
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
CallLoweringInfo & setZExtResult(bool Value=true)
bool isTargetDarwin() const
Definition: ARMSubtarget.h:647
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:852
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
An instruction for storing to memory.
Definition: Instructions.h:321
static mvt_range integer_vector_valuetypes()
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:959
op_iterator op_begin() const
unsigned getStackAlignment() const
Definition: DataLayout.h:257
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1659
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:576
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:747
static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:25
bool hasFP16() const
Definition: ARMSubtarget.h:635
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:221
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:66
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
bool isStrongerThanMonotonic(AtomicOrdering ao)
static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG)
static const MCPhysReg GPRArgRegs[]
Value * CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1742
amdgpu Simplify well known AMD library false Value * Callee
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
static SDValue PerformAddeSubeCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1020
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
MVT getVectorElementType() const
Value * getOperand(unsigned i) const
Definition: User.h:170
Analysis containing CSE Info
Definition: CSEInfo.cpp:21
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
static SDValue FindBFIToCombineWith(SDNode *N)
Class to represent pointers.
Definition: DerivedTypes.h:467
unsigned getByValSize() const
UNDEF - An undefined node.
Definition: ISDOpcodes.h:178
ARM_AAPCS_VFP - Same as ARM_AAPCS, but uses hard floating point ABI.
Definition: CallingConv.h:103
This class is used to represent ISD::STORE nodes.
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:524
bool isReadOnly(const GlobalValue *GV) const
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG)
PerformVMOVDRRCombine - Target-specific dag combine xforms for ARMISD::VMOVDRR.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:118
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:327
unsigned getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1182
TargetInstrInfo - Interface to description of machine instruction set.
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
bool hasDivideInARMMode() const
Definition: ARMSubtarget.h:585
AddrOpc getAM2Op(unsigned AM2Opc)
bool isTargetWatchABI() const
Definition: ARMSubtarget.h:650
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:636
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:229
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits...
static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned LdSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment load operation with given size.
static EVT getExtensionTo64Bits(const EVT &OrigVT)
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool isOSWindows() const
Tests whether the OS is Windows.
Definition: Triple.h:567
bool isFloatTy() const
Return true if this is &#39;float&#39;, a 32-bit IEEE fp type.
Definition: Type.h:147
The memory access is volatile.
void setReturnRegsCount(unsigned s)
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:610
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:750
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:577
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1166
const SDValue & getBasePtr() const
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:43
static SDValue PerformADDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:169
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:166
static SDValue PerformBFICombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
bool isAllOnesValue() const
Determine if all bits are set.
Definition: APInt.h:396
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:85
bool preferISHSTBarriers() const
Definition: ARMSubtarget.h:609
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
bool useSoftFloat() const override
unsigned const MachineRegisterInfo * MRI
static bool isSignExtended(SDNode *N, SelectionDAG &DAG)
isSignExtended - Check if a node is a vector value that is sign-extended or a constant BUILD_VECTOR w...
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:120
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
unsigned countPopulation() const
Count the number of bits set.
Definition: APInt.h:1658
const uint32_t * getThisReturnPreservedMask(const MachineFunction &MF, CallingConv::ID) const
getThisReturnPreservedMask - Returns a call preserved mask specific to the case that &#39;returned&#39; is on...
bool shouldAssumeDSOLocal(const Module &M, const GlobalValue *GV) const
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:429
An array constant whose element type is a simple 1/2/4/8-byte integer or float/double, and whose elements are just simple data values (i.e.
Definition: Constants.h:690
static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) can replace combinations of ...
Machine Value Type.
Value * getCalledValue() const
Definition: InstrTypes.h:1174
Value * concatenateVectors(IRBuilder<> &Builder, ArrayRef< Value *> Vecs)
Concatenate a list of vectors.
static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
bool isOptionalDef() const
Set if this operand is a optional def.
Definition: MCInstrDesc.h:96
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
static SDValue PerformVDUPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, unsigned &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
Simple binary floating point operators.
Definition: ISDOpcodes.h:283
bool isMClass() const
Definition: ARMSubtarget.h:716
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1185
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:66
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE...
Definition: ISDOpcodes.h:728
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
const SDValue & getOperand(unsigned Num) const
static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG)
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:332
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool isSHL16(const SDValue &Op)
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the &#39;usesCustomInserter&#39; fla...
static cl::opt< unsigned > ConstpoolPromotionMaxSize("arm-promote-constant-max-size", cl::Hidden, cl::desc("Maximum size of constant to promote into a constant pool"), cl::init(64))
static ManagedStatic< OptionRegistry > OR
Definition: Options.cpp:31
bool isTargetWatchOS() const
Definition: ARMSubtarget.h:649
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
ARM_APCS - ARM Procedure Calling Standard calling convention (obsolete, but still used on some target...
Definition: CallingConv.h:96
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
Definition: SelectionDAG.h:824
static bool isReverseMask(ArrayRef< int > M, EVT VT)
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
bool isAcquireOrStronger(AtomicOrdering ao)
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
bool hasFPARMv8() const
Definition: ARMSubtarget.h:570
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:588
CombineLevel
Definition: DAGCombine.h:16
static bool isLowerSaturate(const SDValue LHS, const SDValue RHS, const SDValue TrueVal, const SDValue FalseVal, const ISD::CondCode CC, const SDValue K)
static mvt_range fp_valuetypes()
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:740
static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC)
IntCCToARMCC - Convert a DAG integer condition code to an ARM CC.
static bool BitsProperlyConcatenate(const APInt &A, const APInt &B)
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount though its operand...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:434
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:161
This class provides iterator support for SDUse operands that use a specific SDNode.
static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
bool useMulOps() const
Definition: ARMSubtarget.h:595
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl, SelectionDAG &DAG)
static bool isSRA16(const SDValue &Op)
bool isHalfTy() const
Return true if this is &#39;half&#39;, a 16-bit IEEE fp type.
Definition: Type.h:144
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:58
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
bool CombineTo(SDValue O, SDValue N)
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using &#39;From&#39; to use &#39;To&#39; instead.
static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V, SDValue &SatK)
static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
void computeMaxCallFrameSize(const MachineFunction &MF)
Computes the maximum size of a callframe and the AdjustsStack property.
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:767
const APInt & getAPIntValue() const
bool definesRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr fully defines the specified register.
const Triple & getTargetTriple() const
unsigned convertAddSubFlagsOpcode(unsigned OldOpc)
Map pseudo instructions that imply an &#39;S&#39; bit onto real opcodes.
static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, SDValue &RetVal1, SDValue &RetVal2)
Value * getPointerOperand()
Definition: Instructions.h:285
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:57
static bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:770
static mvt_range vector_valuetypes()
std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override
Return the largest legal super-reg register class of the register class for the specified type and it...
int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override
getScalingFactorCost - Return the cost of the scaling used in addressing mode represented by AM...
SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic...
arg_iterator arg_begin()
Definition: Function.h:671
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
static cl::opt< bool > EnableConstpoolPromotion("arm-promote-constant", cl::Hidden, cl::desc("Enable / disable promotion of unnamed_addr constants into " "constant pools"), cl::init(false))
self_iterator getIterator()
Definition: ilist_node.h:82
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, unsigned Size=0)
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
The memory access is non-temporal.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
Class to represent integer types.
Definition: DerivedTypes.h:40
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y)...
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:281
bool allowsUnalignedMem() const
Definition: ARMSubtarget.h:745
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:719
static void ReplaceCMP_SWAP_64Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
static SDValue PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
const ARMSubtarget * getSubtarget() const
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
lazy value info
unsigned MaxStoresPerMemmove
Specify maximum bytes of store instructions per memmove call.
bool isOSBinFormatMachO() const
Tests whether the environment is MachO.
Definition: Triple.h:614
const MachineInstrBuilder & addFrameIndex(int Idx) const
unsigned getInRegsParamsProcessed() const
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:416
static SDValue PerformVDUPLANECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformVDUPLANECombine - Target-specific dag combine xforms for ARMISD::VDUPLANE. ...
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo...
Definition: ISDOpcodes.h:796
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:193
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:556
bool isCheapToSpeculateCttz() const override
Return true if it is cheap to speculate a call to intrinsic cttz.
SmallPtrSet< const GlobalVariable *, 2 > & getGlobalsPromotedToConstantPool()
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
bool isAPCS_ABI() const
std::vector< ArgListEntry > ArgListTy
Extended Value Type.
Definition: ValueTypes.h:34
static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformORCombine - Target-specific dag combine xforms for ISD::OR.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:398
static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool useSjLjEH() const
Definition: ARMSubtarget.h:630
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2083
static SDValue PerformORCombineToSMULWBT(SDNode *OR, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
bool isPositionIndependent() const
This structure contains all information that is necessary for lowering calls.
size_t size() const
Definition: SmallVector.h:53
static bool isLTorLE(ISD::CondCode CC)
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:566
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1207
PointerType * getInt8PtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer to an 8-bit integer value.
Definition: IRBuilder.h:385
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:220
static ARMConstantPoolMBB * Create(LLVMContext &C, const MachineBasicBlock *mbb, unsigned ID, unsigned char PCAdj)
bool isVolatile() const
const TargetMachine & getTargetMachine() const
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl, SelectionDAG &DAG)
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC)
Set the CallingConv that should be used for the specified libcall.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const MCPhysReg * getCalleeSavedRegsViaCopy(const MachineFunction *MF) const
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1655
const std::string & getConstraintString() const
Definition: InlineAsm.h:82
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:971
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
unsigned first
void dump() const
Definition: AsmWriter.cpp:4302
void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC)
Override the default CondCode to be used to test the result of the comparison libcall against zero...
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
The memory access writes data.
static const int BlockSize
Definition: TarWriter.cpp:34
bool isReleaseOrStronger(AtomicOrdering ao)
bool use_empty() const
Return true if there are no uses of this node.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type...
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
size_type size() const
Definition: SmallPtrSet.h:93
static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformShiftCombine - Checks for immediate versions of vector shifts and lowers them.
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specific constraint if it is set.
Definition: MCInstrDesc.h:188
SDValue getTargetConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:639
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:633
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
void dump() const
Dump this node, for debugging.
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:404
Iterator for intrusive lists based on ilist_node.
void setPromotedConstpoolIncrease(int Sz)
CCState - This class holds information needed while lowering arguments and return values...
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG)
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
BlockVerifier::State From
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
bool useSoftFloat() const
Definition: ARMSubtarget.h:711
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:534
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:339
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG)
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
isShuffleMaskLegal - Targets can use this to indicate that they only support some VECTOR_SHUFFLE oper...
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:265
bool isTargetAEABI() const
Definition: ARMSubtarget.h:667
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:451
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:222
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt)
Getvshiftimm - Check if this is a valid build_vector for the immediate operand of a vector shift oper...
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
MachineOperand class - Representation of each machine instruction operand.
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
Module.h This file contains the declarations for the Module class.
static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, const EVT &OrigTy, const EVT &ExtTy, unsigned ExtOpcode)
AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total value size to 64 bits...
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
bool isFPBrccSlow() const
Definition: ARMSubtarget.h:598
const InstrItineraryData * getInstrItineraryData() const override
getInstrItins - Return the instruction itineraries based on subtarget selection.
Definition: ARMSubtarget.h:770
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:734
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
Provides information about what library functions are available for the current target.
void dump() const
CCValAssign - Represent assignment of one arg/retval to a location.
bool isTargetAndroid() const
Definition: ARMSubtarget.h:698
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:1044
static void ReplaceREADCYCLECOUNTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
iterator end() const
Definition: ArrayRef.h:138
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:96
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:730
bool hasMPExtension() const
Definition: ARMSubtarget.h:627
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:644
const DataFlowGraph & G
Definition: RDFGraph.cpp:211
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned char TargetFlags=0) const
bool hasNEON() const
Definition: ARMSubtarget.h:571
SI Lower i1 Copies
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:413
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:581
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:307
This is an abstract virtual class for memory operations.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
const Constant * getConstVal() const
static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2068
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
CallLoweringInfo & setSExtResult(bool Value=true)
unsigned getAM2Offset(unsigned AM2Opc)
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:622
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override
isFPImmLegal - Returns true if the target can instruction select the specified FP immediate natively...
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:226
Represents one node in the SelectionDAG.
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
Return the operation corresponding to !(X op Y), where &#39;op&#39; is a valid SetCC operation.
static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, bool &swpCmpOps, bool &swpVselOps)
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
int64_t getImm() const
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
BBTy * getParent() const
Get the basic block containing the call site.
Definition: CallSite.h:97
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:679
const Function & getFunction() const
Return the LLVM function that this machine code represents.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target&#39;s TargetSubtargetInf...
unsigned getPrefLoopAlignment() const
Definition: ARMSubtarget.h:818
unsigned logBase2() const
Definition: APInt.h:1748
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
static mvt_range integer_valuetypes()
The access may modify the value stored in memory.
static bool isS16(const SDValue &Op, SelectionDAG &DAG)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:539
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:164
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
size_t use_size() const
Return the number of uses of this node.
unsigned getPreferredAlignment(const GlobalVariable *GV) const
Returns the preferred alignment of the specified global.
Definition: DataLayout.cpp:818
static unsigned isNEONTwoResultShuffleMask(ArrayRef< int > ShuffleMask, EVT VT, unsigned &WhichResult, bool &isV_UNDEF)
Check if ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN), and return the corresponding AR...
bool ExpandInlineAsm(CallInst *CI) const override
This hook allows the target to expand an inline asm call to be explicit llvm code if it wants to...
Class to represent vector types.
Definition: DerivedTypes.h:393
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:56
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT...
Definition: ValueTypes.h:73
void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
EVT getMemoryVT() const
Return the type of the in-memory value.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Class for arbitrary precision integers.
Definition: APInt.h:70
unsigned getByValAlign() const
CodeModel::Model getCodeModel() const
Returns the code model.
Instruction * makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) const
iterator_range< use_iterator > uses()
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:420
void setMinFunctionAlignment(unsigned Align)
Set the target&#39;s minimum function alignment (in log2(bytes))
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:241
bool readsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
bool isPowerOf2() const
Check if this APInt&#39;s value is a power of two greater than zero.
Definition: APInt.h:464
static use_iterator use_end()
typename SuperClass::iterator iterator
Definition: SmallVector.h:327
iterator_range< user_iterator > users()
Definition: Value.h:400
void setPrefLoopAlignment(unsigned Align)
Set the target&#39;s preferred loop alignment.
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:468
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
static SDValue PerformAddcSubcCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1103
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:471
bool isTailCall() const
bool genLongCalls() const
Definition: ARMSubtarget.h:632
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:478
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:312
int getMaskElt(unsigned Idx) const
bool isROPI() const
static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, const TargetInstrInfo *TII)
MatchingStackOffset - Return true if the given stack call argument is already available in the same p...
void setArgumentStackSize(unsigned size)
static SDValue PerformXORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
bool hasVMLxForwarding() const
Definition: ARMSubtarget.h:597
static bool isVTBLMask(ArrayRef< int > M, EVT VT)
bool isEmpty() const
Returns true if there are no itineraries.
bool isCheapToSpeculateCtlz() const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
amdgpu Simplify well known AMD library false Value Value * Arg
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:254
static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) can replace combinations of ...
bool isUnknown() const
Returns true if we don&#39;t know any bits.
Definition: KnownBits.h:63
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:568
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
CallLoweringInfo & setTailCall(bool Value=true)
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
const TargetRegisterClass * getRegClassFor(MVT VT) const override
getRegClassFor - Return the register class that should be used for the specified value type...
Value * CreateTruncOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1760
Section Relative (Windows TLS)
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:436
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:638
static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry, SelectionDAG &DAG)
bool isThumb2() const
Definition: ARMSubtarget.h:714
Representation of each machine instruction.
Definition: MachineInstr.h:64
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer, a SRCVALUE for the destination, and a SRCVALUE for the source.
Definition: ISDOpcodes.h:724
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool killsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr kills the specified register.
const Triple & getTargetTriple() const
Definition: ARMSubtarget.h:645
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
static CondCodes getOppositeCondition(CondCodes CC)
Definition: ARMBaseInfo.h:49
bool shouldFoldShiftPairToMask(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to fold a pair of shifts into a mask.
static bool canChangeToInt(SDValue Op, bool &SeenZero, const ARMSubtarget *Subtarget)
canChangeToInt - Given the fp compare operand, return true if it is suitable to morph to an integer c...
bool hasRetAddrStack() const
Definition: ARMSubtarget.h:625
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
void setVarArgsFrameIndex(int Index)
SmallVector< SDValue, 32 > OutVals
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:177
static SDValue PerformVMOVRRDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMOVRRDCombine - Target-specific dag combine xforms for ARMISD::VMOVRRD.
static bool LowerToByteSwap(CallInst *CI)
Try to replace a call instruction with a call to a bswap intrinsic.
unsigned getNumArgOperands() const
Definition: InstrTypes.h:1133
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
bool isTargetIOS() const
Definition: ARMSubtarget.h:648
const uint32_t * getSjLjDispatchPreservedMask(const MachineFunction &MF) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const unsigned char * bytes_begin() const
Definition: StringRef.h:110
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:387
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, uint32_t BitWidth) const
If this is a constant FP splat and the splatted constant FP is an exact power or 2, return the log base 2 integer value.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:705
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:241
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, bool isSigned, const SDLoc &dl, bool doesNotReturn=false, bool isReturnValueUsed=true) const
Returns a pair of (return value, chain).
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
bool isStrongDefinitionForLinker() const
Returns true if this global&#39;s definition will be the one chosen by the linker.
Definition: GlobalValue.h:537
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
unsigned getLocMemOffset() const
ObjectFormatType getObjectFormat() const
getFormat - Get the object format for this triple.
Definition: Triple.h:320
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
static bool isVUZP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of "vector_shuffle v...
bool isEHPad() const
Returns true if the block is a landing pad.
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:206
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:182
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
virtual unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const
If the specified machine instruction is a direct load from a stack slot, return the virtual or physic...
static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget, MachineInstr &MI, const SDNode *Node)
Attaches vregs to MEMCPY that it will use as scratch registers when it is expanded into LDM/STM...
static SDValue LowerInterruptReturn(SmallVectorImpl< SDValue > &RetOps, const SDLoc &DL, SelectionDAG &DAG)
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value...
Definition: APInt.h:482
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:486
bool isVectorLoadExtDesirable(SDValue ExtVal) const override
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable...
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
static bool isLegalT2AddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:614
PointerUnion< const Value *, const PseudoSourceValue * > ptrVal
TargetOptions Options
Definition: TargetMachine.h:97
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1181
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
static bool isLegalAddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
isLegalAddressImmediate - Return true if the integer value can be used as the offset of the target ad...
void setReg(unsigned Reg)
Change the register this operand corresponds to.
void setArgRegsSaveSize(unsigned s)
static MachineOperand CreateImm(int64_t Val)
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:403
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned StSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment store operation with given size.
Flags getFlags() const
Return the raw flags of the source value,.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1213
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:503
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:595
The memory access always returns the same value (or traps).
unsigned MaxStoresPerMemmoveOptSize
Maximum number of store instructions that may be substituted for a call to memmove, used for functions with OptSize attribute.
unsigned MaxStoresPerMemcpyOptSize
Maximum number of store operations that may be substituted for a call to memcpy, used for functions w...
static cl::opt< unsigned > ConstpoolPromotionMaxTotal("arm-promote-constant-max-total", cl::Hidden, cl::desc("Maximum size of ALL constants to promote into a constant pool"), cl::init(128))
void setStackPointerRegisterToSaveRestore(unsigned R)
If set to a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save and restore.
unsigned InferPtrAlignment(SDValue Ptr) const
Infer alignment of a load / store address.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
uint32_t Size
Definition: Profile.cpp:47
Rename collisions when linking (static functions).
Definition: GlobalValue.h:56
Same for multiplication.
Definition: ISDOpcodes.h:257
static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate, that is the target has add instructions which can add a register and the immediate without having to materialize the immediate into a register.
static const int LAST_INDEXED_MODE
Definition: ISDOpcodes.h:922
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1974
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
unsigned getOpcode() const
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:608
SDValue getValue(unsigned R) const
unsigned getInRegsParamsCount() const
bool hasV5TOps() const
Definition: ARMSubtarget.h:534
unsigned MaxStoresPerMemcpy
Specify maximum bytes of store instructions per memcpy call.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:346
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:309
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.setjmp intrinsic.
Definition: ISDOpcodes.h:108
SDValue PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const
PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:366
static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:175
bool hasOptionalDef(QueryType Type=IgnoreBundle) const
Set if this instruction has an optional definition, e.g.
Definition: MachineInstr.h:613
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:457
bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const override
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1722
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getCondCode(ISD::CondCode Cond)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
bool isRegLoc() const
static bool isGTorGE(ISD::CondCode CC)
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:185
bool isTargetMachO() const
Definition: ARMSubtarget.h:658
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:291
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned char TargetFlags=0) const
const MachinePointerInfo & getPointerInfo() const
static SDValue PerformVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:345
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if &#39;Op & Mask&#39; is known to be zero.
MachineConstantPoolValue * getMachineCPVal() const
static SDValue PerformORCombineToBFI(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static RTLIB::Libcall getDivRemLibcall(const SDNode *N, MVT::SimpleValueType SVT)
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructions which can compare a register against the immediate without having to materialize the immediate into a register.
bool hasAtomicStore() const
Return true if this atomic instruction stores to memory.
static bool isVTRN_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of "vector_shuffle v...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, SDValue &CC, bool &Invert, SDValue &OtherOp, SelectionDAG &DAG)
void insert(iterator MBBI, MachineBasicBlock *MBB)
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
void setReturnAddressIsTaken(bool s)
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
void getInRegsParamInfo(unsigned InRegsParamRecordIndex, unsigned &BeginReg, unsigned &EndReg) const
void setMinStackArgumentAlignment(unsigned Align)
Set the minimum stack alignment of an argument (in log2(bytes)).
int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static bool checkAndUpdateCPSRKill(MachineBasicBlock::iterator SelectItr, MachineBasicBlock *BB, const TargetRegisterInfo *TRI)
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:56
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
CallLoweringInfo & setInRegister(bool Value=true)
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
ArrayRef< int > getMask() const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566
LLVM Value Representation.
Definition: Value.h:73
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:302
SDValue getRegister(unsigned Reg, EVT VT)
bool supportsTailCall() const
Definition: ARMSubtarget.h:743
unsigned getResNo() const
get the index which selects a specific result in the SDNode
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
virtual void finalizeLowering(MachineFunction &MF) const
Execute target specific actions to finalize target lowering.
bool useLoadStackGuardNode() const override
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:606
static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2)
Return the load opcode for a given load size.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
void push_back(MachineBasicBlock *MBB)
bool hasVFP4() const
Definition: ARMSubtarget.h:569
static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG)
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC&#39;s if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:962
ARMTargetLowering(const TargetMachine &TM, const ARMSubtarget &STI)
SDValue getValueType(EVT)
KnownBits sext(unsigned BitWidth)
Sign extends the underlying known Zero and One bits.
Definition: KnownBits.h:125
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:175
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
int getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:776
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override
Returns true if an argument of type Ty needs to be passed in a contiguous block of registers in calli...
MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it represents a symbol which...
Definition: ARMBaseInfo.h:279
bool isUndef() const
Return true if the type of the node type undefined.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:331
bool isRWPI() const
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target...
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1124
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:703
static SDValue PerformSHLSimplify(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
unsigned getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG)
void rewindByValRegsInfo()
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone...
bool isTargetWindows() const
Definition: ARMSubtarget.h:654
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
unsigned createJumpTableIndex(const std::vector< MachineBasicBlock *> &DestBBs)
createJumpTableIndex - Create a new jump table.
Type * getElementType() const
Definition: DerivedTypes.h:360
IRTranslator LLVM IR MI
static SDValue PerformSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformSTORECombine - Target-specific dag combine xforms for ISD::STORE.
const APFloat & getValueAPF() const
static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG)
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:417
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:443
void RemoveOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
bool isOSVersionLT(unsigned Major, unsigned Minor=0, unsigned Micro=0) const
isOSVersionLT - Helper function for doing comparisons against version numbers included in the target ...
Definition: Triple.h:408
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
unsigned MaxStoresPerMemsetOptSize
Maximum number of stores operations that may be substituted for the call to memset, used for functions with OptSize attribute.
Type * getArrayElementType() const
Definition: Type.h:365
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
Definition: APInt.h:1596
static BranchProbability getZero()
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget, SelectionDAG &DAG)
bool isFNegFree(EVT VT) const override
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1967
unsigned getNumOperands() const
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:198
bool hasAnyDataBarrier() const
Definition: ARMSubtarget.h:591
static bool isVolatile(Instruction *Inst)
Conversion operators.
Definition: ISDOpcodes.h:465
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for this result type with this index.
const SDValue & getOperand(unsigned i) const
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:789
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it&#39;s implicit...
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
unsigned getLocReg() const
uint64_t getZExtValue() const
bool isBigEndian() const
Definition: DataLayout.h:222
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:474
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:126
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:393
bool isBitFieldInvertedMask(unsigned v)
static SDValue CombineANDShift(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
#define LLVM_DEBUG(X)
Definition: Debug.h:123
unsigned AllocateReg(unsigned Reg)
AllocateReg - Attempt to allocate one register.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass...
bool isAAPCS_ABI() const
bool hasDataBarrier() const
Definition: ARMSubtarget.h:586
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand *> NewMemRefs)
Mutate the specified machine node&#39;s memory references to the provided list.
Instruction * emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1238
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Definition: CallSite.h:271
static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
ExpandBITCAST - If the target supports VFP, this function is called to expand a bit convert where eit...
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns true if the given (atomic) store should be expanded by the IR-level AtomicExpand pass into an...
static SDValue AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
bool isDoubleTy() const
Return true if this is &#39;double&#39;, a 64-bit IEEE fp type.
Definition: Type.h:150
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:584
VectorType * getType() const
Overload to return most specific vector type.
static bool isSaturatingConditional(const SDValue &Op, SDValue &V, uint64_t &K, bool &usat)
Value * getPointerOperand()
Definition: Instructions.h:413
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:174
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:785
LLVMContext * getContext() const
Definition: SelectionDAG.h:407
static ARMConstantPoolConstant * Create(const Constant *C, unsigned ID)
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it&#39;s free to truncate a value of type FromTy to type ToTy.
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:375
static bool isSplatMask(const int *Mask, EVT VT)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned getConstantPoolIndex(const Constant *C, unsigned Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one...
static bool isVEXTMask(ArrayRef< int > M, EVT VT, bool &ReverseVEXT, unsigned &Imm)
static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, unsigned AlignCheck)
Type * getElementType() const
Definition: DerivedTypes.h:486
bool hasThumb2() const
Definition: ARMSubtarget.h:715
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
std::vector< MachineBasicBlock * >::iterator succ_iterator
static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG)
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:242
static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144
IntegerType * Int32Ty
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:221
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
Definition: MCInstrDesc.h:581
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
Definition: Instruction.h:67
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:622
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, unsigned Align, bool *Fast) const override
allowsMisalignedMemoryAccesses - Returns true if the target allows unaligned memory accesses of the s...
void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
FloatABI::ABIType FloatABIType
FloatABIType - This setting is set by -float-abi=xxx option is specfied on the command line...
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:914
static TargetLowering::ArgListTy getDivRemArgList(const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget)
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:380
static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, const SDLoc &dl, EVT &VT, bool is128Bits, NEONModImmType type)
isNEONModifiedImm - Check if the specified splat value corresponds to a valid vector constant for a N...
bool hasFullFP16() const
Definition: ARMSubtarget.h:637
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
This class is used to represent ISD::LOAD nodes.
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V)
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary...
Definition: ISDOpcodes.h:623