LLVM  8.0.1
AArch64FastISel.cpp
Go to the documentation of this file.
1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the AArch64-specific support for the FastISel class. Some
11 // of the target-specific code is generated by tablegen in the file
12 // AArch64GenFastISel.inc, which is #included here.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AArch64.h"
18 #include "AArch64RegisterInfo.h"
19 #include "AArch64Subtarget.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/ADT/APFloat.h"
23 #include "llvm/ADT/APInt.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/CodeGen/FastISel.h"
40 #include "llvm/IR/Argument.h"
41 #include "llvm/IR/Attributes.h"
42 #include "llvm/IR/BasicBlock.h"
43 #include "llvm/IR/CallingConv.h"
44 #include "llvm/IR/Constant.h"
45 #include "llvm/IR/Constants.h"
46 #include "llvm/IR/DataLayout.h"
47 #include "llvm/IR/DerivedTypes.h"
48 #include "llvm/IR/Function.h"
50 #include "llvm/IR/GlobalValue.h"
51 #include "llvm/IR/InstrTypes.h"
52 #include "llvm/IR/Instruction.h"
53 #include "llvm/IR/Instructions.h"
54 #include "llvm/IR/IntrinsicInst.h"
55 #include "llvm/IR/Intrinsics.h"
56 #include "llvm/IR/Operator.h"
57 #include "llvm/IR/Type.h"
58 #include "llvm/IR/User.h"
59 #include "llvm/IR/Value.h"
60 #include "llvm/MC/MCInstrDesc.h"
61 #include "llvm/MC/MCRegisterInfo.h"
62 #include "llvm/MC/MCSymbol.h"
64 #include "llvm/Support/Casting.h"
65 #include "llvm/Support/CodeGen.h"
66 #include "llvm/Support/Compiler.h"
70 #include <algorithm>
71 #include <cassert>
72 #include <cstdint>
73 #include <iterator>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 namespace {
79 
80 class AArch64FastISel final : public FastISel {
81  class Address {
82  public:
83  using BaseKind = enum {
84  RegBase,
85  FrameIndexBase
86  };
87 
88  private:
89  BaseKind Kind = RegBase;
91  union {
92  unsigned Reg;
93  int FI;
94  } Base;
95  unsigned OffsetReg = 0;
96  unsigned Shift = 0;
97  int64_t Offset = 0;
98  const GlobalValue *GV = nullptr;
99 
100  public:
101  Address() { Base.Reg = 0; }
102 
103  void setKind(BaseKind K) { Kind = K; }
104  BaseKind getKind() const { return Kind; }
105  void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
107  bool isRegBase() const { return Kind == RegBase; }
108  bool isFIBase() const { return Kind == FrameIndexBase; }
109 
110  void setReg(unsigned Reg) {
111  assert(isRegBase() && "Invalid base register access!");
112  Base.Reg = Reg;
113  }
114 
115  unsigned getReg() const {
116  assert(isRegBase() && "Invalid base register access!");
117  return Base.Reg;
118  }
119 
120  void setOffsetReg(unsigned Reg) {
121  OffsetReg = Reg;
122  }
123 
124  unsigned getOffsetReg() const {
125  return OffsetReg;
126  }
127 
128  void setFI(unsigned FI) {
129  assert(isFIBase() && "Invalid base frame index access!");
130  Base.FI = FI;
131  }
132 
133  unsigned getFI() const {
134  assert(isFIBase() && "Invalid base frame index access!");
135  return Base.FI;
136  }
137 
138  void setOffset(int64_t O) { Offset = O; }
139  int64_t getOffset() { return Offset; }
140  void setShift(unsigned S) { Shift = S; }
141  unsigned getShift() { return Shift; }
142 
143  void setGlobalValue(const GlobalValue *G) { GV = G; }
144  const GlobalValue *getGlobalValue() { return GV; }
145  };
146 
147  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
148  /// make the right decision when generating code for different targets.
149  const AArch64Subtarget *Subtarget;
151 
152  bool fastLowerArguments() override;
153  bool fastLowerCall(CallLoweringInfo &CLI) override;
154  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
155 
156 private:
157  // Selection routines.
158  bool selectAddSub(const Instruction *I);
159  bool selectLogicalOp(const Instruction *I);
160  bool selectLoad(const Instruction *I);
161  bool selectStore(const Instruction *I);
162  bool selectBranch(const Instruction *I);
163  bool selectIndirectBr(const Instruction *I);
164  bool selectCmp(const Instruction *I);
165  bool selectSelect(const Instruction *I);
166  bool selectFPExt(const Instruction *I);
167  bool selectFPTrunc(const Instruction *I);
168  bool selectFPToInt(const Instruction *I, bool Signed);
169  bool selectIntToFP(const Instruction *I, bool Signed);
170  bool selectRem(const Instruction *I, unsigned ISDOpcode);
171  bool selectRet(const Instruction *I);
172  bool selectTrunc(const Instruction *I);
173  bool selectIntExt(const Instruction *I);
174  bool selectMul(const Instruction *I);
175  bool selectShift(const Instruction *I);
176  bool selectBitCast(const Instruction *I);
177  bool selectFRem(const Instruction *I);
178  bool selectSDiv(const Instruction *I);
179  bool selectGetElementPtr(const Instruction *I);
180  bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
181 
182  // Utility helper routines.
183  bool isTypeLegal(Type *Ty, MVT &VT);
184  bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
185  bool isValueAvailable(const Value *V) const;
186  bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
187  bool computeCallAddress(const Value *V, Address &Addr);
188  bool simplifyAddress(Address &Addr, MVT VT);
189  void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
191  unsigned ScaleFactor, MachineMemOperand *MMO);
192  bool isMemCpySmall(uint64_t Len, unsigned Alignment);
193  bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
194  unsigned Alignment);
195  bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
196  const Value *Cond);
197  bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
198  bool optimizeSelect(const SelectInst *SI);
199  std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
200 
201  // Emit helper routines.
202  unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
203  const Value *RHS, bool SetFlags = false,
204  bool WantResult = true, bool IsZExt = false);
205  unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
206  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
207  bool SetFlags = false, bool WantResult = true);
208  unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
209  bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
210  bool WantResult = true);
211  unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
212  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
213  AArch64_AM::ShiftExtendType ShiftType,
214  uint64_t ShiftImm, bool SetFlags = false,
215  bool WantResult = true);
216  unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
217  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
219  uint64_t ShiftImm, bool SetFlags = false,
220  bool WantResult = true);
221 
222  // Emit functions.
223  bool emitCompareAndBranch(const BranchInst *BI);
224  bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
225  bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
226  bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
227  bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
228  unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
229  MachineMemOperand *MMO = nullptr);
230  bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
231  MachineMemOperand *MMO = nullptr);
232  bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
233  MachineMemOperand *MMO = nullptr);
234  unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
235  unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
236  unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
237  bool SetFlags = false, bool WantResult = true,
238  bool IsZExt = false);
239  unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
240  unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
241  bool SetFlags = false, bool WantResult = true,
242  bool IsZExt = false);
243  unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
244  unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
245  unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
246  unsigned RHSReg, bool RHSIsKill,
247  AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
248  bool WantResult = true);
249  unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
250  const Value *RHS);
251  unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
252  bool LHSIsKill, uint64_t Imm);
253  unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
254  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
255  uint64_t ShiftImm);
256  unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
257  unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
258  unsigned Op1, bool Op1IsKill);
259  unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
260  unsigned Op1, bool Op1IsKill);
261  unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
262  unsigned Op1, bool Op1IsKill);
263  unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
264  unsigned Op1Reg, bool Op1IsKill);
265  unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
266  uint64_t Imm, bool IsZExt = true);
267  unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
268  unsigned Op1Reg, bool Op1IsKill);
269  unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
270  uint64_t Imm, bool IsZExt = true);
271  unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
272  unsigned Op1Reg, bool Op1IsKill);
273  unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
274  uint64_t Imm, bool IsZExt = false);
275 
276  unsigned materializeInt(const ConstantInt *CI, MVT VT);
277  unsigned materializeFP(const ConstantFP *CFP, MVT VT);
278  unsigned materializeGV(const GlobalValue *GV);
279 
280  // Call handling routines.
281 private:
282  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
283  bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
284  unsigned &NumBytes);
285  bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
286 
287 public:
288  // Backend specific FastISel code.
289  unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
290  unsigned fastMaterializeConstant(const Constant *C) override;
291  unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
292 
293  explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
294  const TargetLibraryInfo *LibInfo)
295  : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
296  Subtarget =
297  &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
298  Context = &FuncInfo.Fn->getContext();
299  }
300 
301  bool fastSelectInstruction(const Instruction *I) override;
302 
303 #include "AArch64GenFastISel.inc"
304 };
305 
306 } // end anonymous namespace
307 
308 #include "AArch64GenCallingConv.inc"
309 
310 /// Check if the sign-/zero-extend will be a noop.
311 static bool isIntExtFree(const Instruction *I) {
312  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
313  "Unexpected integer extend instruction.");
314  assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
315  "Unexpected value type.");
316  bool IsZExt = isa<ZExtInst>(I);
317 
318  if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
319  if (LI->hasOneUse())
320  return true;
321 
322  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
323  if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
324  return true;
325 
326  return false;
327 }
328 
329 /// Determine the implicit scale factor that is applied by a memory
330 /// operation for a given value type.
331 static unsigned getImplicitScaleFactor(MVT VT) {
332  switch (VT.SimpleTy) {
333  default:
334  return 0; // invalid
335  case MVT::i1: // fall-through
336  case MVT::i8:
337  return 1;
338  case MVT::i16:
339  return 2;
340  case MVT::i32: // fall-through
341  case MVT::f32:
342  return 4;
343  case MVT::i64: // fall-through
344  case MVT::f64:
345  return 8;
346  }
347 }
348 
349 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
350  if (CC == CallingConv::WebKit_JS)
351  return CC_AArch64_WebKit_JS;
352  if (CC == CallingConv::GHC)
353  return CC_AArch64_GHC;
354  return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
355 }
356 
357 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
358  assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
359  "Alloca should always return a pointer.");
360 
361  // Don't handle dynamic allocas.
362  if (!FuncInfo.StaticAllocaMap.count(AI))
363  return 0;
364 
366  FuncInfo.StaticAllocaMap.find(AI);
367 
368  if (SI != FuncInfo.StaticAllocaMap.end()) {
369  unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
370  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
371  ResultReg)
372  .addFrameIndex(SI->second)
373  .addImm(0)
374  .addImm(0);
375  return ResultReg;
376  }
377 
378  return 0;
379 }
380 
381 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
382  if (VT > MVT::i64)
383  return 0;
384 
385  if (!CI->isZero())
386  return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
387 
388  // Create a copy from the zero register to materialize a "0" value.
389  const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
390  : &AArch64::GPR32RegClass;
391  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
392  unsigned ResultReg = createResultReg(RC);
393  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
394  ResultReg).addReg(ZeroReg, getKillRegState(true));
395  return ResultReg;
396 }
397 
398 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
399  // Positive zero (+0.0) has to be materialized with a fmov from the zero
400  // register, because the immediate version of fmov cannot encode zero.
401  if (CFP->isNullValue())
402  return fastMaterializeFloatZero(CFP);
403 
404  if (VT != MVT::f32 && VT != MVT::f64)
405  return 0;
406 
407  const APFloat Val = CFP->getValueAPF();
408  bool Is64Bit = (VT == MVT::f64);
409  // This checks to see if we can use FMOV instructions to materialize
410  // a constant, otherwise we have to materialize via the constant pool.
411  if (TLI.isFPImmLegal(Val, VT)) {
412  int Imm =
413  Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
414  assert((Imm != -1) && "Cannot encode floating-point constant.");
415  unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
416  return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
417  }
418 
419  // For the MachO large code model materialize the FP constant in code.
420  if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
421  unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
422  const TargetRegisterClass *RC = Is64Bit ?
423  &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
424 
425  unsigned TmpReg = createResultReg(RC);
426  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
427  .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
428 
429  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
430  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
431  TII.get(TargetOpcode::COPY), ResultReg)
432  .addReg(TmpReg, getKillRegState(true));
433 
434  return ResultReg;
435  }
436 
437  // Materialize via constant pool. MachineConstantPool wants an explicit
438  // alignment.
439  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
440  if (Align == 0)
441  Align = DL.getTypeAllocSize(CFP->getType());
442 
443  unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
444  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
445  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
446  ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
447 
448  unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
449  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
450  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
451  .addReg(ADRPReg)
453  return ResultReg;
454 }
455 
456 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
457  // We can't handle thread-local variables quickly yet.
458  if (GV->isThreadLocal())
459  return 0;
460 
461  // MachO still uses GOT for large code-model accesses, but ELF requires
462  // movz/movk sequences, which FastISel doesn't handle yet.
463  if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
464  return 0;
465 
466  unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
467 
468  EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
469  if (!DestEVT.isSimple())
470  return 0;
471 
472  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
473  unsigned ResultReg;
474 
475  if (OpFlags & AArch64II::MO_GOT) {
476  // ADRP + LDRX
477  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
478  ADRPReg)
479  .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
480 
481  ResultReg = createResultReg(&AArch64::GPR64RegClass);
482  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
483  ResultReg)
484  .addReg(ADRPReg)
485  .addGlobalAddress(GV, 0,
487  } else {
488  // ADRP + ADDX
489  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
490  ADRPReg)
491  .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
492 
493  ResultReg = createResultReg(&AArch64::GPR64spRegClass);
494  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
495  ResultReg)
496  .addReg(ADRPReg)
497  .addGlobalAddress(GV, 0,
499  .addImm(0);
500  }
501  return ResultReg;
502 }
503 
504 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
505  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
506 
507  // Only handle simple types.
508  if (!CEVT.isSimple())
509  return 0;
510  MVT VT = CEVT.getSimpleVT();
511 
512  if (const auto *CI = dyn_cast<ConstantInt>(C))
513  return materializeInt(CI, VT);
514  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
515  return materializeFP(CFP, VT);
516  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
517  return materializeGV(GV);
518 
519  return 0;
520 }
521 
522 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
523  assert(CFP->isNullValue() &&
524  "Floating-point constant is not a positive zero.");
525  MVT VT;
526  if (!isTypeLegal(CFP->getType(), VT))
527  return 0;
528 
529  if (VT != MVT::f32 && VT != MVT::f64)
530  return 0;
531 
532  bool Is64Bit = (VT == MVT::f64);
533  unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
534  unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
535  return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
536 }
537 
538 /// Check if the multiply is by a power-of-2 constant.
539 static bool isMulPowOf2(const Value *I) {
540  if (const auto *MI = dyn_cast<MulOperator>(I)) {
541  if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
542  if (C->getValue().isPowerOf2())
543  return true;
544  if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
545  if (C->getValue().isPowerOf2())
546  return true;
547  }
548  return false;
549 }
550 
551 // Computes the address to get to an object.
552 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
553 {
554  const User *U = nullptr;
555  unsigned Opcode = Instruction::UserOp1;
556  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
557  // Don't walk into other basic blocks unless the object is an alloca from
558  // another block, otherwise it may not have a virtual register assigned.
559  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
560  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
561  Opcode = I->getOpcode();
562  U = I;
563  }
564  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
565  Opcode = C->getOpcode();
566  U = C;
567  }
568 
569  if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
570  if (Ty->getAddressSpace() > 255)
571  // Fast instruction selection doesn't support the special
572  // address spaces.
573  return false;
574 
575  switch (Opcode) {
576  default:
577  break;
578  case Instruction::BitCast:
579  // Look through bitcasts.
580  return computeAddress(U->getOperand(0), Addr, Ty);
581 
582  case Instruction::IntToPtr:
583  // Look past no-op inttoptrs.
584  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
585  TLI.getPointerTy(DL))
586  return computeAddress(U->getOperand(0), Addr, Ty);
587  break;
588 
589  case Instruction::PtrToInt:
590  // Look past no-op ptrtoints.
591  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
592  return computeAddress(U->getOperand(0), Addr, Ty);
593  break;
594 
595  case Instruction::GetElementPtr: {
596  Address SavedAddr = Addr;
597  uint64_t TmpOffset = Addr.getOffset();
598 
599  // Iterate through the GEP folding the constants into offsets where
600  // we can.
601  for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
602  GTI != E; ++GTI) {
603  const Value *Op = GTI.getOperand();
604  if (StructType *STy = GTI.getStructTypeOrNull()) {
605  const StructLayout *SL = DL.getStructLayout(STy);
606  unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
607  TmpOffset += SL->getElementOffset(Idx);
608  } else {
609  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
610  while (true) {
611  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
612  // Constant-offset addressing.
613  TmpOffset += CI->getSExtValue() * S;
614  break;
615  }
616  if (canFoldAddIntoGEP(U, Op)) {
617  // A compatible add with a constant operand. Fold the constant.
618  ConstantInt *CI =
619  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
620  TmpOffset += CI->getSExtValue() * S;
621  // Iterate on the other operand.
622  Op = cast<AddOperator>(Op)->getOperand(0);
623  continue;
624  }
625  // Unsupported
626  goto unsupported_gep;
627  }
628  }
629  }
630 
631  // Try to grab the base operand now.
632  Addr.setOffset(TmpOffset);
633  if (computeAddress(U->getOperand(0), Addr, Ty))
634  return true;
635 
636  // We failed, restore everything and try the other options.
637  Addr = SavedAddr;
638 
639  unsupported_gep:
640  break;
641  }
642  case Instruction::Alloca: {
643  const AllocaInst *AI = cast<AllocaInst>(Obj);
645  FuncInfo.StaticAllocaMap.find(AI);
646  if (SI != FuncInfo.StaticAllocaMap.end()) {
647  Addr.setKind(Address::FrameIndexBase);
648  Addr.setFI(SI->second);
649  return true;
650  }
651  break;
652  }
653  case Instruction::Add: {
654  // Adds of constants are common and easy enough.
655  const Value *LHS = U->getOperand(0);
656  const Value *RHS = U->getOperand(1);
657 
658  if (isa<ConstantInt>(LHS))
659  std::swap(LHS, RHS);
660 
661  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
662  Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
663  return computeAddress(LHS, Addr, Ty);
664  }
665 
666  Address Backup = Addr;
667  if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
668  return true;
669  Addr = Backup;
670 
671  break;
672  }
673  case Instruction::Sub: {
674  // Subs of constants are common and easy enough.
675  const Value *LHS = U->getOperand(0);
676  const Value *RHS = U->getOperand(1);
677 
678  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
679  Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
680  return computeAddress(LHS, Addr, Ty);
681  }
682  break;
683  }
684  case Instruction::Shl: {
685  if (Addr.getOffsetReg())
686  break;
687 
688  const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
689  if (!CI)
690  break;
691 
692  unsigned Val = CI->getZExtValue();
693  if (Val < 1 || Val > 3)
694  break;
695 
696  uint64_t NumBytes = 0;
697  if (Ty && Ty->isSized()) {
698  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
699  NumBytes = NumBits / 8;
700  if (!isPowerOf2_64(NumBits))
701  NumBytes = 0;
702  }
703 
704  if (NumBytes != (1ULL << Val))
705  break;
706 
707  Addr.setShift(Val);
708  Addr.setExtendType(AArch64_AM::LSL);
709 
710  const Value *Src = U->getOperand(0);
711  if (const auto *I = dyn_cast<Instruction>(Src)) {
712  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
713  // Fold the zext or sext when it won't become a noop.
714  if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
715  if (!isIntExtFree(ZE) &&
716  ZE->getOperand(0)->getType()->isIntegerTy(32)) {
717  Addr.setExtendType(AArch64_AM::UXTW);
718  Src = ZE->getOperand(0);
719  }
720  } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
721  if (!isIntExtFree(SE) &&
722  SE->getOperand(0)->getType()->isIntegerTy(32)) {
723  Addr.setExtendType(AArch64_AM::SXTW);
724  Src = SE->getOperand(0);
725  }
726  }
727  }
728  }
729 
730  if (const auto *AI = dyn_cast<BinaryOperator>(Src))
731  if (AI->getOpcode() == Instruction::And) {
732  const Value *LHS = AI->getOperand(0);
733  const Value *RHS = AI->getOperand(1);
734 
735  if (const auto *C = dyn_cast<ConstantInt>(LHS))
736  if (C->getValue() == 0xffffffff)
737  std::swap(LHS, RHS);
738 
739  if (const auto *C = dyn_cast<ConstantInt>(RHS))
740  if (C->getValue() == 0xffffffff) {
741  Addr.setExtendType(AArch64_AM::UXTW);
742  unsigned Reg = getRegForValue(LHS);
743  if (!Reg)
744  return false;
745  bool RegIsKill = hasTrivialKill(LHS);
746  Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
747  AArch64::sub_32);
748  Addr.setOffsetReg(Reg);
749  return true;
750  }
751  }
752 
753  unsigned Reg = getRegForValue(Src);
754  if (!Reg)
755  return false;
756  Addr.setOffsetReg(Reg);
757  return true;
758  }
759  case Instruction::Mul: {
760  if (Addr.getOffsetReg())
761  break;
762 
763  if (!isMulPowOf2(U))
764  break;
765 
766  const Value *LHS = U->getOperand(0);
767  const Value *RHS = U->getOperand(1);
768 
769  // Canonicalize power-of-2 value to the RHS.
770  if (const auto *C = dyn_cast<ConstantInt>(LHS))
771  if (C->getValue().isPowerOf2())
772  std::swap(LHS, RHS);
773 
774  assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
775  const auto *C = cast<ConstantInt>(RHS);
776  unsigned Val = C->getValue().logBase2();
777  if (Val < 1 || Val > 3)
778  break;
779 
780  uint64_t NumBytes = 0;
781  if (Ty && Ty->isSized()) {
782  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
783  NumBytes = NumBits / 8;
784  if (!isPowerOf2_64(NumBits))
785  NumBytes = 0;
786  }
787 
788  if (NumBytes != (1ULL << Val))
789  break;
790 
791  Addr.setShift(Val);
792  Addr.setExtendType(AArch64_AM::LSL);
793 
794  const Value *Src = LHS;
795  if (const auto *I = dyn_cast<Instruction>(Src)) {
796  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
797  // Fold the zext or sext when it won't become a noop.
798  if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
799  if (!isIntExtFree(ZE) &&
800  ZE->getOperand(0)->getType()->isIntegerTy(32)) {
801  Addr.setExtendType(AArch64_AM::UXTW);
802  Src = ZE->getOperand(0);
803  }
804  } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
805  if (!isIntExtFree(SE) &&
806  SE->getOperand(0)->getType()->isIntegerTy(32)) {
807  Addr.setExtendType(AArch64_AM::SXTW);
808  Src = SE->getOperand(0);
809  }
810  }
811  }
812  }
813 
814  unsigned Reg = getRegForValue(Src);
815  if (!Reg)
816  return false;
817  Addr.setOffsetReg(Reg);
818  return true;
819  }
820  case Instruction::And: {
821  if (Addr.getOffsetReg())
822  break;
823 
824  if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
825  break;
826 
827  const Value *LHS = U->getOperand(0);
828  const Value *RHS = U->getOperand(1);
829 
830  if (const auto *C = dyn_cast<ConstantInt>(LHS))
831  if (C->getValue() == 0xffffffff)
832  std::swap(LHS, RHS);
833 
834  if (const auto *C = dyn_cast<ConstantInt>(RHS))
835  if (C->getValue() == 0xffffffff) {
836  Addr.setShift(0);
837  Addr.setExtendType(AArch64_AM::LSL);
838  Addr.setExtendType(AArch64_AM::UXTW);
839 
840  unsigned Reg = getRegForValue(LHS);
841  if (!Reg)
842  return false;
843  bool RegIsKill = hasTrivialKill(LHS);
844  Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
845  AArch64::sub_32);
846  Addr.setOffsetReg(Reg);
847  return true;
848  }
849  break;
850  }
851  case Instruction::SExt:
852  case Instruction::ZExt: {
853  if (!Addr.getReg() || Addr.getOffsetReg())
854  break;
855 
856  const Value *Src = nullptr;
857  // Fold the zext or sext when it won't become a noop.
858  if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
859  if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
860  Addr.setExtendType(AArch64_AM::UXTW);
861  Src = ZE->getOperand(0);
862  }
863  } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
864  if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
865  Addr.setExtendType(AArch64_AM::SXTW);
866  Src = SE->getOperand(0);
867  }
868  }
869 
870  if (!Src)
871  break;
872 
873  Addr.setShift(0);
874  unsigned Reg = getRegForValue(Src);
875  if (!Reg)
876  return false;
877  Addr.setOffsetReg(Reg);
878  return true;
879  }
880  } // end switch
881 
882  if (Addr.isRegBase() && !Addr.getReg()) {
883  unsigned Reg = getRegForValue(Obj);
884  if (!Reg)
885  return false;
886  Addr.setReg(Reg);
887  return true;
888  }
889 
890  if (!Addr.getOffsetReg()) {
891  unsigned Reg = getRegForValue(Obj);
892  if (!Reg)
893  return false;
894  Addr.setOffsetReg(Reg);
895  return true;
896  }
897 
898  return false;
899 }
900 
901 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
902  const User *U = nullptr;
903  unsigned Opcode = Instruction::UserOp1;
904  bool InMBB = true;
905 
906  if (const auto *I = dyn_cast<Instruction>(V)) {
907  Opcode = I->getOpcode();
908  U = I;
909  InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
910  } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
911  Opcode = C->getOpcode();
912  U = C;
913  }
914 
915  switch (Opcode) {
916  default: break;
917  case Instruction::BitCast:
918  // Look past bitcasts if its operand is in the same BB.
919  if (InMBB)
920  return computeCallAddress(U->getOperand(0), Addr);
921  break;
922  case Instruction::IntToPtr:
923  // Look past no-op inttoptrs if its operand is in the same BB.
924  if (InMBB &&
925  TLI.getValueType(DL, U->getOperand(0)->getType()) ==
926  TLI.getPointerTy(DL))
927  return computeCallAddress(U->getOperand(0), Addr);
928  break;
929  case Instruction::PtrToInt:
930  // Look past no-op ptrtoints if its operand is in the same BB.
931  if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
932  return computeCallAddress(U->getOperand(0), Addr);
933  break;
934  }
935 
936  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
937  Addr.setGlobalValue(GV);
938  return true;
939  }
940 
941  // If all else fails, try to materialize the value in a register.
942  if (!Addr.getGlobalValue()) {
943  Addr.setReg(getRegForValue(V));
944  return Addr.getReg() != 0;
945  }
946 
947  return false;
948 }
949 
950 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
951  EVT evt = TLI.getValueType(DL, Ty, true);
952 
953  // Only handle simple types.
954  if (evt == MVT::Other || !evt.isSimple())
955  return false;
956  VT = evt.getSimpleVT();
957 
958  // This is a legal type, but it's not something we handle in fast-isel.
959  if (VT == MVT::f128)
960  return false;
961 
962  // Handle all other legal types, i.e. a register that will directly hold this
963  // value.
964  return TLI.isTypeLegal(VT);
965 }
966 
967 /// Determine if the value type is supported by FastISel.
968 ///
969 /// FastISel for AArch64 can handle more value types than are legal. This adds
970 /// simple value type such as i1, i8, and i16.
971 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
972  if (Ty->isVectorTy() && !IsVectorAllowed)
973  return false;
974 
975  if (isTypeLegal(Ty, VT))
976  return true;
977 
978  // If this is a type than can be sign or zero-extended to a basic operation
979  // go ahead and accept it now.
980  if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
981  return true;
982 
983  return false;
984 }
985 
986 bool AArch64FastISel::isValueAvailable(const Value *V) const {
987  if (!isa<Instruction>(V))
988  return true;
989 
990  const auto *I = cast<Instruction>(V);
991  return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
992 }
993 
994 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
995  unsigned ScaleFactor = getImplicitScaleFactor(VT);
996  if (!ScaleFactor)
997  return false;
998 
999  bool ImmediateOffsetNeedsLowering = false;
1000  bool RegisterOffsetNeedsLowering = false;
1001  int64_t Offset = Addr.getOffset();
1002  if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1003  ImmediateOffsetNeedsLowering = true;
1004  else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1005  !isUInt<12>(Offset / ScaleFactor))
1006  ImmediateOffsetNeedsLowering = true;
1007 
1008  // Cannot encode an offset register and an immediate offset in the same
1009  // instruction. Fold the immediate offset into the load/store instruction and
1010  // emit an additional add to take care of the offset register.
1011  if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1012  RegisterOffsetNeedsLowering = true;
1013 
1014  // Cannot encode zero register as base.
1015  if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1016  RegisterOffsetNeedsLowering = true;
1017 
1018  // If this is a stack pointer and the offset needs to be simplified then put
1019  // the alloca address into a register, set the base type back to register and
1020  // continue. This should almost never happen.
1021  if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1022  {
1023  unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1024  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
1025  ResultReg)
1026  .addFrameIndex(Addr.getFI())
1027  .addImm(0)
1028  .addImm(0);
1029  Addr.setKind(Address::RegBase);
1030  Addr.setReg(ResultReg);
1031  }
1032 
1033  if (RegisterOffsetNeedsLowering) {
1034  unsigned ResultReg = 0;
1035  if (Addr.getReg()) {
1036  if (Addr.getExtendType() == AArch64_AM::SXTW ||
1037  Addr.getExtendType() == AArch64_AM::UXTW )
1038  ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1039  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1040  /*TODO:IsKill=*/false, Addr.getExtendType(),
1041  Addr.getShift());
1042  else
1043  ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1044  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1045  /*TODO:IsKill=*/false, AArch64_AM::LSL,
1046  Addr.getShift());
1047  } else {
1048  if (Addr.getExtendType() == AArch64_AM::UXTW)
1049  ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1050  /*Op0IsKill=*/false, Addr.getShift(),
1051  /*IsZExt=*/true);
1052  else if (Addr.getExtendType() == AArch64_AM::SXTW)
1053  ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1054  /*Op0IsKill=*/false, Addr.getShift(),
1055  /*IsZExt=*/false);
1056  else
1057  ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1058  /*Op0IsKill=*/false, Addr.getShift());
1059  }
1060  if (!ResultReg)
1061  return false;
1062 
1063  Addr.setReg(ResultReg);
1064  Addr.setOffsetReg(0);
1065  Addr.setShift(0);
1066  Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1067  }
1068 
1069  // Since the offset is too large for the load/store instruction get the
1070  // reg+offset into a register.
1071  if (ImmediateOffsetNeedsLowering) {
1072  unsigned ResultReg;
1073  if (Addr.getReg())
1074  // Try to fold the immediate into the add instruction.
1075  ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
1076  else
1077  ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1078 
1079  if (!ResultReg)
1080  return false;
1081  Addr.setReg(ResultReg);
1082  Addr.setOffset(0);
1083  }
1084  return true;
1085 }
1086 
1087 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1088  const MachineInstrBuilder &MIB,
1090  unsigned ScaleFactor,
1091  MachineMemOperand *MMO) {
1092  int64_t Offset = Addr.getOffset() / ScaleFactor;
1093  // Frame base works a bit differently. Handle it separately.
1094  if (Addr.isFIBase()) {
1095  int FI = Addr.getFI();
1096  // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1097  // and alignment should be based on the VT.
1098  MMO = FuncInfo.MF->getMachineMemOperand(
1099  MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1100  MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
1101  // Now add the rest of the operands.
1102  MIB.addFrameIndex(FI).addImm(Offset);
1103  } else {
1104  assert(Addr.isRegBase() && "Unexpected address kind.");
1105  const MCInstrDesc &II = MIB->getDesc();
1106  unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1107  Addr.setReg(
1108  constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1109  Addr.setOffsetReg(
1110  constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1111  if (Addr.getOffsetReg()) {
1112  assert(Addr.getOffset() == 0 && "Unexpected offset");
1113  bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1114  Addr.getExtendType() == AArch64_AM::SXTX;
1115  MIB.addReg(Addr.getReg());
1116  MIB.addReg(Addr.getOffsetReg());
1117  MIB.addImm(IsSigned);
1118  MIB.addImm(Addr.getShift() != 0);
1119  } else
1120  MIB.addReg(Addr.getReg()).addImm(Offset);
1121  }
1122 
1123  if (MMO)
1124  MIB.addMemOperand(MMO);
1125 }
1126 
1127 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1128  const Value *RHS, bool SetFlags,
1129  bool WantResult, bool IsZExt) {
1131  bool NeedExtend = false;
1132  switch (RetVT.SimpleTy) {
1133  default:
1134  return 0;
1135  case MVT::i1:
1136  NeedExtend = true;
1137  break;
1138  case MVT::i8:
1139  NeedExtend = true;
1140  ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1141  break;
1142  case MVT::i16:
1143  NeedExtend = true;
1144  ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1145  break;
1146  case MVT::i32: // fall-through
1147  case MVT::i64:
1148  break;
1149  }
1150  MVT SrcVT = RetVT;
1151  RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1152 
1153  // Canonicalize immediates to the RHS first.
1154  if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1155  std::swap(LHS, RHS);
1156 
1157  // Canonicalize mul by power of 2 to the RHS.
1158  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1159  if (isMulPowOf2(LHS))
1160  std::swap(LHS, RHS);
1161 
1162  // Canonicalize shift immediate to the RHS.
1163  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1164  if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1165  if (isa<ConstantInt>(SI->getOperand(1)))
1166  if (SI->getOpcode() == Instruction::Shl ||
1167  SI->getOpcode() == Instruction::LShr ||
1168  SI->getOpcode() == Instruction::AShr )
1169  std::swap(LHS, RHS);
1170 
1171  unsigned LHSReg = getRegForValue(LHS);
1172  if (!LHSReg)
1173  return 0;
1174  bool LHSIsKill = hasTrivialKill(LHS);
1175 
1176  if (NeedExtend)
1177  LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1178 
1179  unsigned ResultReg = 0;
1180  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1181  uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1182  if (C->isNegative())
1183  ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1184  SetFlags, WantResult);
1185  else
1186  ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1187  WantResult);
1188  } else if (const auto *C = dyn_cast<Constant>(RHS))
1189  if (C->isNullValue())
1190  ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
1191  WantResult);
1192 
1193  if (ResultReg)
1194  return ResultReg;
1195 
1196  // Only extend the RHS within the instruction if there is a valid extend type.
1197  if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1198  isValueAvailable(RHS)) {
1199  if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1200  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1201  if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1202  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1203  if (!RHSReg)
1204  return 0;
1205  bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1206  return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1207  RHSIsKill, ExtendType, C->getZExtValue(),
1208  SetFlags, WantResult);
1209  }
1210  unsigned RHSReg = getRegForValue(RHS);
1211  if (!RHSReg)
1212  return 0;
1213  bool RHSIsKill = hasTrivialKill(RHS);
1214  return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1215  ExtendType, 0, SetFlags, WantResult);
1216  }
1217 
1218  // Check if the mul can be folded into the instruction.
1219  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1220  if (isMulPowOf2(RHS)) {
1221  const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1222  const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1223 
1224  if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1225  if (C->getValue().isPowerOf2())
1226  std::swap(MulLHS, MulRHS);
1227 
1228  assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1229  uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1230  unsigned RHSReg = getRegForValue(MulLHS);
1231  if (!RHSReg)
1232  return 0;
1233  bool RHSIsKill = hasTrivialKill(MulLHS);
1234  ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1235  RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
1236  WantResult);
1237  if (ResultReg)
1238  return ResultReg;
1239  }
1240  }
1241 
1242  // Check if the shift can be folded into the instruction.
1243  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1244  if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1245  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1247  switch (SI->getOpcode()) {
1248  default: break;
1249  case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1250  case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1251  case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1252  }
1253  uint64_t ShiftVal = C->getZExtValue();
1254  if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1255  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1256  if (!RHSReg)
1257  return 0;
1258  bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1259  ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1260  RHSIsKill, ShiftType, ShiftVal, SetFlags,
1261  WantResult);
1262  if (ResultReg)
1263  return ResultReg;
1264  }
1265  }
1266  }
1267  }
1268 
1269  unsigned RHSReg = getRegForValue(RHS);
1270  if (!RHSReg)
1271  return 0;
1272  bool RHSIsKill = hasTrivialKill(RHS);
1273 
1274  if (NeedExtend)
1275  RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1276 
1277  return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1278  SetFlags, WantResult);
1279 }
1280 
1281 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1282  bool LHSIsKill, unsigned RHSReg,
1283  bool RHSIsKill, bool SetFlags,
1284  bool WantResult) {
1285  assert(LHSReg && RHSReg && "Invalid register number.");
1286 
1287  if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1288  RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1289  return 0;
1290 
1291  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1292  return 0;
1293 
1294  static const unsigned OpcTable[2][2][2] = {
1295  { { AArch64::SUBWrr, AArch64::SUBXrr },
1296  { AArch64::ADDWrr, AArch64::ADDXrr } },
1297  { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1298  { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1299  };
1300  bool Is64Bit = RetVT == MVT::i64;
1301  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1302  const TargetRegisterClass *RC =
1303  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1304  unsigned ResultReg;
1305  if (WantResult)
1306  ResultReg = createResultReg(RC);
1307  else
1308  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1309 
1310  const MCInstrDesc &II = TII.get(Opc);
1311  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1312  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1313  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1314  .addReg(LHSReg, getKillRegState(LHSIsKill))
1315  .addReg(RHSReg, getKillRegState(RHSIsKill));
1316  return ResultReg;
1317 }
1318 
1319 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1320  bool LHSIsKill, uint64_t Imm,
1321  bool SetFlags, bool WantResult) {
1322  assert(LHSReg && "Invalid register number.");
1323 
1324  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1325  return 0;
1326 
1327  unsigned ShiftImm;
1328  if (isUInt<12>(Imm))
1329  ShiftImm = 0;
1330  else if ((Imm & 0xfff000) == Imm) {
1331  ShiftImm = 12;
1332  Imm >>= 12;
1333  } else
1334  return 0;
1335 
1336  static const unsigned OpcTable[2][2][2] = {
1337  { { AArch64::SUBWri, AArch64::SUBXri },
1338  { AArch64::ADDWri, AArch64::ADDXri } },
1339  { { AArch64::SUBSWri, AArch64::SUBSXri },
1340  { AArch64::ADDSWri, AArch64::ADDSXri } }
1341  };
1342  bool Is64Bit = RetVT == MVT::i64;
1343  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1344  const TargetRegisterClass *RC;
1345  if (SetFlags)
1346  RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1347  else
1348  RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1349  unsigned ResultReg;
1350  if (WantResult)
1351  ResultReg = createResultReg(RC);
1352  else
1353  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1354 
1355  const MCInstrDesc &II = TII.get(Opc);
1356  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1357  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1358  .addReg(LHSReg, getKillRegState(LHSIsKill))
1359  .addImm(Imm)
1360  .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1361  return ResultReg;
1362 }
1363 
1364 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1365  bool LHSIsKill, unsigned RHSReg,
1366  bool RHSIsKill,
1367  AArch64_AM::ShiftExtendType ShiftType,
1368  uint64_t ShiftImm, bool SetFlags,
1369  bool WantResult) {
1370  assert(LHSReg && RHSReg && "Invalid register number.");
1371  assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1372  RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1373 
1374  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1375  return 0;
1376 
1377  // Don't deal with undefined shifts.
1378  if (ShiftImm >= RetVT.getSizeInBits())
1379  return 0;
1380 
1381  static const unsigned OpcTable[2][2][2] = {
1382  { { AArch64::SUBWrs, AArch64::SUBXrs },
1383  { AArch64::ADDWrs, AArch64::ADDXrs } },
1384  { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1385  { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1386  };
1387  bool Is64Bit = RetVT == MVT::i64;
1388  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1389  const TargetRegisterClass *RC =
1390  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1391  unsigned ResultReg;
1392  if (WantResult)
1393  ResultReg = createResultReg(RC);
1394  else
1395  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1396 
1397  const MCInstrDesc &II = TII.get(Opc);
1398  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1399  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1400  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1401  .addReg(LHSReg, getKillRegState(LHSIsKill))
1402  .addReg(RHSReg, getKillRegState(RHSIsKill))
1403  .addImm(getShifterImm(ShiftType, ShiftImm));
1404  return ResultReg;
1405 }
1406 
1407 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1408  bool LHSIsKill, unsigned RHSReg,
1409  bool RHSIsKill,
1411  uint64_t ShiftImm, bool SetFlags,
1412  bool WantResult) {
1413  assert(LHSReg && RHSReg && "Invalid register number.");
1414  assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1415  RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1416 
1417  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1418  return 0;
1419 
1420  if (ShiftImm >= 4)
1421  return 0;
1422 
1423  static const unsigned OpcTable[2][2][2] = {
1424  { { AArch64::SUBWrx, AArch64::SUBXrx },
1425  { AArch64::ADDWrx, AArch64::ADDXrx } },
1426  { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1427  { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1428  };
1429  bool Is64Bit = RetVT == MVT::i64;
1430  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1431  const TargetRegisterClass *RC = nullptr;
1432  if (SetFlags)
1433  RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1434  else
1435  RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1436  unsigned ResultReg;
1437  if (WantResult)
1438  ResultReg = createResultReg(RC);
1439  else
1440  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1441 
1442  const MCInstrDesc &II = TII.get(Opc);
1443  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1444  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1445  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1446  .addReg(LHSReg, getKillRegState(LHSIsKill))
1447  .addReg(RHSReg, getKillRegState(RHSIsKill))
1448  .addImm(getArithExtendImm(ExtType, ShiftImm));
1449  return ResultReg;
1450 }
1451 
1452 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1453  Type *Ty = LHS->getType();
1454  EVT EVT = TLI.getValueType(DL, Ty, true);
1455  if (!EVT.isSimple())
1456  return false;
1457  MVT VT = EVT.getSimpleVT();
1458 
1459  switch (VT.SimpleTy) {
1460  default:
1461  return false;
1462  case MVT::i1:
1463  case MVT::i8:
1464  case MVT::i16:
1465  case MVT::i32:
1466  case MVT::i64:
1467  return emitICmp(VT, LHS, RHS, IsZExt);
1468  case MVT::f32:
1469  case MVT::f64:
1470  return emitFCmp(VT, LHS, RHS);
1471  }
1472 }
1473 
1474 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1475  bool IsZExt) {
1476  return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1477  IsZExt) != 0;
1478 }
1479 
1480 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1481  uint64_t Imm) {
1482  return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1483  /*SetFlags=*/true, /*WantResult=*/false) != 0;
1484 }
1485 
1486 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1487  if (RetVT != MVT::f32 && RetVT != MVT::f64)
1488  return false;
1489 
1490  // Check to see if the 2nd operand is a constant that we can encode directly
1491  // in the compare.
1492  bool UseImm = false;
1493  if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1494  if (CFP->isZero() && !CFP->isNegative())
1495  UseImm = true;
1496 
1497  unsigned LHSReg = getRegForValue(LHS);
1498  if (!LHSReg)
1499  return false;
1500  bool LHSIsKill = hasTrivialKill(LHS);
1501 
1502  if (UseImm) {
1503  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1504  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1505  .addReg(LHSReg, getKillRegState(LHSIsKill));
1506  return true;
1507  }
1508 
1509  unsigned RHSReg = getRegForValue(RHS);
1510  if (!RHSReg)
1511  return false;
1512  bool RHSIsKill = hasTrivialKill(RHS);
1513 
1514  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1515  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1516  .addReg(LHSReg, getKillRegState(LHSIsKill))
1517  .addReg(RHSReg, getKillRegState(RHSIsKill));
1518  return true;
1519 }
1520 
1521 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1522  bool SetFlags, bool WantResult, bool IsZExt) {
1523  return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1524  IsZExt);
1525 }
1526 
1527 /// This method is a wrapper to simplify add emission.
1528 ///
1529 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1530 /// that fails, then try to materialize the immediate into a register and use
1531 /// emitAddSub_rr instead.
1532 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
1533  int64_t Imm) {
1534  unsigned ResultReg;
1535  if (Imm < 0)
1536  ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
1537  else
1538  ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
1539 
1540  if (ResultReg)
1541  return ResultReg;
1542 
1543  unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1544  if (!CReg)
1545  return 0;
1546 
1547  ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
1548  return ResultReg;
1549 }
1550 
1551 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1552  bool SetFlags, bool WantResult, bool IsZExt) {
1553  return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1554  IsZExt);
1555 }
1556 
1557 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1558  bool LHSIsKill, unsigned RHSReg,
1559  bool RHSIsKill, bool WantResult) {
1560  return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1561  RHSIsKill, /*SetFlags=*/true, WantResult);
1562 }
1563 
1564 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1565  bool LHSIsKill, unsigned RHSReg,
1566  bool RHSIsKill,
1567  AArch64_AM::ShiftExtendType ShiftType,
1568  uint64_t ShiftImm, bool WantResult) {
1569  return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1570  RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1571  WantResult);
1572 }
1573 
1574 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1575  const Value *LHS, const Value *RHS) {
1576  // Canonicalize immediates to the RHS first.
1577  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1578  std::swap(LHS, RHS);
1579 
1580  // Canonicalize mul by power-of-2 to the RHS.
1581  if (LHS->hasOneUse() && isValueAvailable(LHS))
1582  if (isMulPowOf2(LHS))
1583  std::swap(LHS, RHS);
1584 
1585  // Canonicalize shift immediate to the RHS.
1586  if (LHS->hasOneUse() && isValueAvailable(LHS))
1587  if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1588  if (isa<ConstantInt>(SI->getOperand(1)))
1589  std::swap(LHS, RHS);
1590 
1591  unsigned LHSReg = getRegForValue(LHS);
1592  if (!LHSReg)
1593  return 0;
1594  bool LHSIsKill = hasTrivialKill(LHS);
1595 
1596  unsigned ResultReg = 0;
1597  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1598  uint64_t Imm = C->getZExtValue();
1599  ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1600  }
1601  if (ResultReg)
1602  return ResultReg;
1603 
1604  // Check if the mul can be folded into the instruction.
1605  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1606  if (isMulPowOf2(RHS)) {
1607  const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1608  const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1609 
1610  if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1611  if (C->getValue().isPowerOf2())
1612  std::swap(MulLHS, MulRHS);
1613 
1614  assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1615  uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1616 
1617  unsigned RHSReg = getRegForValue(MulLHS);
1618  if (!RHSReg)
1619  return 0;
1620  bool RHSIsKill = hasTrivialKill(MulLHS);
1621  ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1622  RHSIsKill, ShiftVal);
1623  if (ResultReg)
1624  return ResultReg;
1625  }
1626  }
1627 
1628  // Check if the shift can be folded into the instruction.
1629  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1630  if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1631  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1632  uint64_t ShiftVal = C->getZExtValue();
1633  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1634  if (!RHSReg)
1635  return 0;
1636  bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1637  ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1638  RHSIsKill, ShiftVal);
1639  if (ResultReg)
1640  return ResultReg;
1641  }
1642  }
1643 
1644  unsigned RHSReg = getRegForValue(RHS);
1645  if (!RHSReg)
1646  return 0;
1647  bool RHSIsKill = hasTrivialKill(RHS);
1648 
1649  MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1650  ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1651  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1652  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1653  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1654  }
1655  return ResultReg;
1656 }
1657 
1658 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1659  unsigned LHSReg, bool LHSIsKill,
1660  uint64_t Imm) {
1661  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1662  "ISD nodes are not consecutive!");
1663  static const unsigned OpcTable[3][2] = {
1664  { AArch64::ANDWri, AArch64::ANDXri },
1665  { AArch64::ORRWri, AArch64::ORRXri },
1666  { AArch64::EORWri, AArch64::EORXri }
1667  };
1668  const TargetRegisterClass *RC;
1669  unsigned Opc;
1670  unsigned RegSize;
1671  switch (RetVT.SimpleTy) {
1672  default:
1673  return 0;
1674  case MVT::i1:
1675  case MVT::i8:
1676  case MVT::i16:
1677  case MVT::i32: {
1678  unsigned Idx = ISDOpc - ISD::AND;
1679  Opc = OpcTable[Idx][0];
1680  RC = &AArch64::GPR32spRegClass;
1681  RegSize = 32;
1682  break;
1683  }
1684  case MVT::i64:
1685  Opc = OpcTable[ISDOpc - ISD::AND][1];
1686  RC = &AArch64::GPR64spRegClass;
1687  RegSize = 64;
1688  break;
1689  }
1690 
1691  if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1692  return 0;
1693 
1694  unsigned ResultReg =
1695  fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1696  AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1697  if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1698  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1699  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1700  }
1701  return ResultReg;
1702 }
1703 
1704 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1705  unsigned LHSReg, bool LHSIsKill,
1706  unsigned RHSReg, bool RHSIsKill,
1707  uint64_t ShiftImm) {
1708  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1709  "ISD nodes are not consecutive!");
1710  static const unsigned OpcTable[3][2] = {
1711  { AArch64::ANDWrs, AArch64::ANDXrs },
1712  { AArch64::ORRWrs, AArch64::ORRXrs },
1713  { AArch64::EORWrs, AArch64::EORXrs }
1714  };
1715 
1716  // Don't deal with undefined shifts.
1717  if (ShiftImm >= RetVT.getSizeInBits())
1718  return 0;
1719 
1720  const TargetRegisterClass *RC;
1721  unsigned Opc;
1722  switch (RetVT.SimpleTy) {
1723  default:
1724  return 0;
1725  case MVT::i1:
1726  case MVT::i8:
1727  case MVT::i16:
1728  case MVT::i32:
1729  Opc = OpcTable[ISDOpc - ISD::AND][0];
1730  RC = &AArch64::GPR32RegClass;
1731  break;
1732  case MVT::i64:
1733  Opc = OpcTable[ISDOpc - ISD::AND][1];
1734  RC = &AArch64::GPR64RegClass;
1735  break;
1736  }
1737  unsigned ResultReg =
1738  fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1740  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1741  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1742  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1743  }
1744  return ResultReg;
1745 }
1746 
1747 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1748  uint64_t Imm) {
1749  return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1750 }
1751 
1752 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1753  bool WantZExt, MachineMemOperand *MMO) {
1754  if (!TLI.allowsMisalignedMemoryAccesses(VT))
1755  return 0;
1756 
1757  // Simplify this down to something we can handle.
1758  if (!simplifyAddress(Addr, VT))
1759  return 0;
1760 
1761  unsigned ScaleFactor = getImplicitScaleFactor(VT);
1762  if (!ScaleFactor)
1763  llvm_unreachable("Unexpected value type.");
1764 
1765  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1766  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1767  bool UseScaled = true;
1768  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1769  UseScaled = false;
1770  ScaleFactor = 1;
1771  }
1772 
1773  static const unsigned GPOpcTable[2][8][4] = {
1774  // Sign-extend.
1775  { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1776  AArch64::LDURXi },
1777  { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1778  AArch64::LDURXi },
1779  { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1780  AArch64::LDRXui },
1781  { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1782  AArch64::LDRXui },
1783  { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1784  AArch64::LDRXroX },
1785  { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1786  AArch64::LDRXroX },
1787  { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1788  AArch64::LDRXroW },
1789  { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1790  AArch64::LDRXroW }
1791  },
1792  // Zero-extend.
1793  { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1794  AArch64::LDURXi },
1795  { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1796  AArch64::LDURXi },
1797  { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1798  AArch64::LDRXui },
1799  { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1800  AArch64::LDRXui },
1801  { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1802  AArch64::LDRXroX },
1803  { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1804  AArch64::LDRXroX },
1805  { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1806  AArch64::LDRXroW },
1807  { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1808  AArch64::LDRXroW }
1809  }
1810  };
1811 
1812  static const unsigned FPOpcTable[4][2] = {
1813  { AArch64::LDURSi, AArch64::LDURDi },
1814  { AArch64::LDRSui, AArch64::LDRDui },
1815  { AArch64::LDRSroX, AArch64::LDRDroX },
1816  { AArch64::LDRSroW, AArch64::LDRDroW }
1817  };
1818 
1819  unsigned Opc;
1820  const TargetRegisterClass *RC;
1821  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1822  Addr.getOffsetReg();
1823  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1824  if (Addr.getExtendType() == AArch64_AM::UXTW ||
1825  Addr.getExtendType() == AArch64_AM::SXTW)
1826  Idx++;
1827 
1828  bool IsRet64Bit = RetVT == MVT::i64;
1829  switch (VT.SimpleTy) {
1830  default:
1831  llvm_unreachable("Unexpected value type.");
1832  case MVT::i1: // Intentional fall-through.
1833  case MVT::i8:
1834  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1835  RC = (IsRet64Bit && !WantZExt) ?
1836  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1837  break;
1838  case MVT::i16:
1839  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1840  RC = (IsRet64Bit && !WantZExt) ?
1841  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1842  break;
1843  case MVT::i32:
1844  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1845  RC = (IsRet64Bit && !WantZExt) ?
1846  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1847  break;
1848  case MVT::i64:
1849  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1850  RC = &AArch64::GPR64RegClass;
1851  break;
1852  case MVT::f32:
1853  Opc = FPOpcTable[Idx][0];
1854  RC = &AArch64::FPR32RegClass;
1855  break;
1856  case MVT::f64:
1857  Opc = FPOpcTable[Idx][1];
1858  RC = &AArch64::FPR64RegClass;
1859  break;
1860  }
1861 
1862  // Create the base instruction, then add the operands.
1863  unsigned ResultReg = createResultReg(RC);
1864  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1865  TII.get(Opc), ResultReg);
1866  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1867 
1868  // Loading an i1 requires special handling.
1869  if (VT == MVT::i1) {
1870  unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1871  assert(ANDReg && "Unexpected AND instruction emission failure.");
1872  ResultReg = ANDReg;
1873  }
1874 
1875  // For zero-extending loads to 64bit we emit a 32bit load and then convert
1876  // the 32bit reg to a 64bit reg.
1877  if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1878  unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1879  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1880  TII.get(AArch64::SUBREG_TO_REG), Reg64)
1881  .addImm(0)
1882  .addReg(ResultReg, getKillRegState(true))
1883  .addImm(AArch64::sub_32);
1884  ResultReg = Reg64;
1885  }
1886  return ResultReg;
1887 }
1888 
1889 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1890  MVT VT;
1891  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1892  return false;
1893 
1894  if (VT.isVector())
1895  return selectOperator(I, I->getOpcode());
1896 
1897  unsigned ResultReg;
1898  switch (I->getOpcode()) {
1899  default:
1900  llvm_unreachable("Unexpected instruction.");
1901  case Instruction::Add:
1902  ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1903  break;
1904  case Instruction::Sub:
1905  ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1906  break;
1907  }
1908  if (!ResultReg)
1909  return false;
1910 
1911  updateValueMap(I, ResultReg);
1912  return true;
1913 }
1914 
1915 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1916  MVT VT;
1917  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1918  return false;
1919 
1920  if (VT.isVector())
1921  return selectOperator(I, I->getOpcode());
1922 
1923  unsigned ResultReg;
1924  switch (I->getOpcode()) {
1925  default:
1926  llvm_unreachable("Unexpected instruction.");
1927  case Instruction::And:
1928  ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1929  break;
1930  case Instruction::Or:
1931  ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1932  break;
1933  case Instruction::Xor:
1934  ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1935  break;
1936  }
1937  if (!ResultReg)
1938  return false;
1939 
1940  updateValueMap(I, ResultReg);
1941  return true;
1942 }
1943 
1944 bool AArch64FastISel::selectLoad(const Instruction *I) {
1945  MVT VT;
1946  // Verify we have a legal type before going any further. Currently, we handle
1947  // simple types that will directly fit in a register (i32/f32/i64/f64) or
1948  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1949  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1950  cast<LoadInst>(I)->isAtomic())
1951  return false;
1952 
1953  const Value *SV = I->getOperand(0);
1954  if (TLI.supportSwiftError()) {
1955  // Swifterror values can come from either a function parameter with
1956  // swifterror attribute or an alloca with swifterror attribute.
1957  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1958  if (Arg->hasSwiftErrorAttr())
1959  return false;
1960  }
1961 
1962  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1963  if (Alloca->isSwiftError())
1964  return false;
1965  }
1966  }
1967 
1968  // See if we can handle this address.
1969  Address Addr;
1970  if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1971  return false;
1972 
1973  // Fold the following sign-/zero-extend into the load instruction.
1974  bool WantZExt = true;
1975  MVT RetVT = VT;
1976  const Value *IntExtVal = nullptr;
1977  if (I->hasOneUse()) {
1978  if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1979  if (isTypeSupported(ZE->getType(), RetVT))
1980  IntExtVal = ZE;
1981  else
1982  RetVT = VT;
1983  } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1984  if (isTypeSupported(SE->getType(), RetVT))
1985  IntExtVal = SE;
1986  else
1987  RetVT = VT;
1988  WantZExt = false;
1989  }
1990  }
1991 
1992  unsigned ResultReg =
1993  emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1994  if (!ResultReg)
1995  return false;
1996 
1997  // There are a few different cases we have to handle, because the load or the
1998  // sign-/zero-extend might not be selected by FastISel if we fall-back to
1999  // SelectionDAG. There is also an ordering issue when both instructions are in
2000  // different basic blocks.
2001  // 1.) The load instruction is selected by FastISel, but the integer extend
2002  // not. This usually happens when the integer extend is in a different
2003  // basic block and SelectionDAG took over for that basic block.
2004  // 2.) The load instruction is selected before the integer extend. This only
2005  // happens when the integer extend is in a different basic block.
2006  // 3.) The load instruction is selected by SelectionDAG and the integer extend
2007  // by FastISel. This happens if there are instructions between the load
2008  // and the integer extend that couldn't be selected by FastISel.
2009  if (IntExtVal) {
2010  // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2011  // could select it. Emit a copy to subreg if necessary. FastISel will remove
2012  // it when it selects the integer extend.
2013  unsigned Reg = lookUpRegForValue(IntExtVal);
2014  auto *MI = MRI.getUniqueVRegDef(Reg);
2015  if (!MI) {
2016  if (RetVT == MVT::i64 && VT <= MVT::i32) {
2017  if (WantZExt) {
2018  // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2019  MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2020  ResultReg = std::prev(I)->getOperand(0).getReg();
2021  removeDeadCode(I, std::next(I));
2022  } else
2023  ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2024  /*IsKill=*/true,
2025  AArch64::sub_32);
2026  }
2027  updateValueMap(I, ResultReg);
2028  return true;
2029  }
2030 
2031  // The integer extend has already been emitted - delete all the instructions
2032  // that have been emitted by the integer extend lowering code and use the
2033  // result from the load instruction directly.
2034  while (MI) {
2035  Reg = 0;
2036  for (auto &Opnd : MI->uses()) {
2037  if (Opnd.isReg()) {
2038  Reg = Opnd.getReg();
2039  break;
2040  }
2041  }
2043  removeDeadCode(I, std::next(I));
2044  MI = nullptr;
2045  if (Reg)
2046  MI = MRI.getUniqueVRegDef(Reg);
2047  }
2048  updateValueMap(IntExtVal, ResultReg);
2049  return true;
2050  }
2051 
2052  updateValueMap(I, ResultReg);
2053  return true;
2054 }
2055 
2056 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2057  unsigned AddrReg,
2058  MachineMemOperand *MMO) {
2059  unsigned Opc;
2060  switch (VT.SimpleTy) {
2061  default: return false;
2062  case MVT::i8: Opc = AArch64::STLRB; break;
2063  case MVT::i16: Opc = AArch64::STLRH; break;
2064  case MVT::i32: Opc = AArch64::STLRW; break;
2065  case MVT::i64: Opc = AArch64::STLRX; break;
2066  }
2067 
2068  const MCInstrDesc &II = TII.get(Opc);
2069  SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2070  AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2071  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2072  .addReg(SrcReg)
2073  .addReg(AddrReg)
2074  .addMemOperand(MMO);
2075  return true;
2076 }
2077 
2078 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2079  MachineMemOperand *MMO) {
2080  if (!TLI.allowsMisalignedMemoryAccesses(VT))
2081  return false;
2082 
2083  // Simplify this down to something we can handle.
2084  if (!simplifyAddress(Addr, VT))
2085  return false;
2086 
2087  unsigned ScaleFactor = getImplicitScaleFactor(VT);
2088  if (!ScaleFactor)
2089  llvm_unreachable("Unexpected value type.");
2090 
2091  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2092  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2093  bool UseScaled = true;
2094  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2095  UseScaled = false;
2096  ScaleFactor = 1;
2097  }
2098 
2099  static const unsigned OpcTable[4][6] = {
2100  { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2101  AArch64::STURSi, AArch64::STURDi },
2102  { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2103  AArch64::STRSui, AArch64::STRDui },
2104  { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2105  AArch64::STRSroX, AArch64::STRDroX },
2106  { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2107  AArch64::STRSroW, AArch64::STRDroW }
2108  };
2109 
2110  unsigned Opc;
2111  bool VTIsi1 = false;
2112  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2113  Addr.getOffsetReg();
2114  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2115  if (Addr.getExtendType() == AArch64_AM::UXTW ||
2116  Addr.getExtendType() == AArch64_AM::SXTW)
2117  Idx++;
2118 
2119  switch (VT.SimpleTy) {
2120  default: llvm_unreachable("Unexpected value type.");
2121  case MVT::i1: VTIsi1 = true; LLVM_FALLTHROUGH;
2122  case MVT::i8: Opc = OpcTable[Idx][0]; break;
2123  case MVT::i16: Opc = OpcTable[Idx][1]; break;
2124  case MVT::i32: Opc = OpcTable[Idx][2]; break;
2125  case MVT::i64: Opc = OpcTable[Idx][3]; break;
2126  case MVT::f32: Opc = OpcTable[Idx][4]; break;
2127  case MVT::f64: Opc = OpcTable[Idx][5]; break;
2128  }
2129 
2130  // Storing an i1 requires special handling.
2131  if (VTIsi1 && SrcReg != AArch64::WZR) {
2132  unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2133  assert(ANDReg && "Unexpected AND instruction emission failure.");
2134  SrcReg = ANDReg;
2135  }
2136  // Create the base instruction, then add the operands.
2137  const MCInstrDesc &II = TII.get(Opc);
2138  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2139  MachineInstrBuilder MIB =
2140  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2141  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2142 
2143  return true;
2144 }
2145 
2146 bool AArch64FastISel::selectStore(const Instruction *I) {
2147  MVT VT;
2148  const Value *Op0 = I->getOperand(0);
2149  // Verify we have a legal type before going any further. Currently, we handle
2150  // simple types that will directly fit in a register (i32/f32/i64/f64) or
2151  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2152  if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2153  return false;
2154 
2155  const Value *PtrV = I->getOperand(1);
2156  if (TLI.supportSwiftError()) {
2157  // Swifterror values can come from either a function parameter with
2158  // swifterror attribute or an alloca with swifterror attribute.
2159  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2160  if (Arg->hasSwiftErrorAttr())
2161  return false;
2162  }
2163 
2164  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2165  if (Alloca->isSwiftError())
2166  return false;
2167  }
2168  }
2169 
2170  // Get the value to be stored into a register. Use the zero register directly
2171  // when possible to avoid an unnecessary copy and a wasted register.
2172  unsigned SrcReg = 0;
2173  if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2174  if (CI->isZero())
2175  SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2176  } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2177  if (CF->isZero() && !CF->isNegative()) {
2178  VT = MVT::getIntegerVT(VT.getSizeInBits());
2179  SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2180  }
2181  }
2182 
2183  if (!SrcReg)
2184  SrcReg = getRegForValue(Op0);
2185 
2186  if (!SrcReg)
2187  return false;
2188 
2189  auto *SI = cast<StoreInst>(I);
2190 
2191  // Try to emit a STLR for seq_cst/release.
2192  if (SI->isAtomic()) {
2193  AtomicOrdering Ord = SI->getOrdering();
2194  // The non-atomic instructions are sufficient for relaxed stores.
2195  if (isReleaseOrStronger(Ord)) {
2196  // The STLR addressing mode only supports a base reg; pass that directly.
2197  unsigned AddrReg = getRegForValue(PtrV);
2198  return emitStoreRelease(VT, SrcReg, AddrReg,
2199  createMachineMemOperandFor(I));
2200  }
2201  }
2202 
2203  // See if we can handle this address.
2204  Address Addr;
2205  if (!computeAddress(PtrV, Addr, Op0->getType()))
2206  return false;
2207 
2208  if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2209  return false;
2210  return true;
2211 }
2212 
2214  switch (Pred) {
2215  case CmpInst::FCMP_ONE:
2216  case CmpInst::FCMP_UEQ:
2217  default:
2218  // AL is our "false" for now. The other two need more compares.
2219  return AArch64CC::AL;
2220  case CmpInst::ICMP_EQ:
2221  case CmpInst::FCMP_OEQ:
2222  return AArch64CC::EQ;
2223  case CmpInst::ICMP_SGT:
2224  case CmpInst::FCMP_OGT:
2225  return AArch64CC::GT;
2226  case CmpInst::ICMP_SGE:
2227  case CmpInst::FCMP_OGE:
2228  return AArch64CC::GE;
2229  case CmpInst::ICMP_UGT:
2230  case CmpInst::FCMP_UGT:
2231  return AArch64CC::HI;
2232  case CmpInst::FCMP_OLT:
2233  return AArch64CC::MI;
2234  case CmpInst::ICMP_ULE:
2235  case CmpInst::FCMP_OLE:
2236  return AArch64CC::LS;
2237  case CmpInst::FCMP_ORD:
2238  return AArch64CC::VC;
2239  case CmpInst::FCMP_UNO:
2240  return AArch64CC::VS;
2241  case CmpInst::FCMP_UGE:
2242  return AArch64CC::PL;
2243  case CmpInst::ICMP_SLT:
2244  case CmpInst::FCMP_ULT:
2245  return AArch64CC::LT;
2246  case CmpInst::ICMP_SLE:
2247  case CmpInst::FCMP_ULE:
2248  return AArch64CC::LE;
2249  case CmpInst::FCMP_UNE:
2250  case CmpInst::ICMP_NE:
2251  return AArch64CC::NE;
2252  case CmpInst::ICMP_UGE:
2253  return AArch64CC::HS;
2254  case CmpInst::ICMP_ULT:
2255  return AArch64CC::LO;
2256  }
2257 }
2258 
2259 /// Try to emit a combined compare-and-branch instruction.
2260 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2261  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2262  // will not be produced, as they are conditional branch instructions that do
2263  // not set flags.
2264  if (FuncInfo.MF->getFunction().hasFnAttribute(
2266  return false;
2267 
2268  assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2269  const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2270  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2271 
2272  const Value *LHS = CI->getOperand(0);
2273  const Value *RHS = CI->getOperand(1);
2274 
2275  MVT VT;
2276  if (!isTypeSupported(LHS->getType(), VT))
2277  return false;
2278 
2279  unsigned BW = VT.getSizeInBits();
2280  if (BW > 64)
2281  return false;
2282 
2283  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2284  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2285 
2286  // Try to take advantage of fallthrough opportunities.
2287  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2288  std::swap(TBB, FBB);
2289  Predicate = CmpInst::getInversePredicate(Predicate);
2290  }
2291 
2292  int TestBit = -1;
2293  bool IsCmpNE;
2294  switch (Predicate) {
2295  default:
2296  return false;
2297  case CmpInst::ICMP_EQ:
2298  case CmpInst::ICMP_NE:
2299  if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2300  std::swap(LHS, RHS);
2301 
2302  if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2303  return false;
2304 
2305  if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2306  if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2307  const Value *AndLHS = AI->getOperand(0);
2308  const Value *AndRHS = AI->getOperand(1);
2309 
2310  if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2311  if (C->getValue().isPowerOf2())
2312  std::swap(AndLHS, AndRHS);
2313 
2314  if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2315  if (C->getValue().isPowerOf2()) {
2316  TestBit = C->getValue().logBase2();
2317  LHS = AndLHS;
2318  }
2319  }
2320 
2321  if (VT == MVT::i1)
2322  TestBit = 0;
2323 
2324  IsCmpNE = Predicate == CmpInst::ICMP_NE;
2325  break;
2326  case CmpInst::ICMP_SLT:
2327  case CmpInst::ICMP_SGE:
2328  if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2329  return false;
2330 
2331  TestBit = BW - 1;
2332  IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2333  break;
2334  case CmpInst::ICMP_SGT:
2335  case CmpInst::ICMP_SLE:
2336  if (!isa<ConstantInt>(RHS))
2337  return false;
2338 
2339  if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2340  return false;
2341 
2342  TestBit = BW - 1;
2343  IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2344  break;
2345  } // end switch
2346 
2347  static const unsigned OpcTable[2][2][2] = {
2348  { {AArch64::CBZW, AArch64::CBZX },
2349  {AArch64::CBNZW, AArch64::CBNZX} },
2350  { {AArch64::TBZW, AArch64::TBZX },
2351  {AArch64::TBNZW, AArch64::TBNZX} }
2352  };
2353 
2354  bool IsBitTest = TestBit != -1;
2355  bool Is64Bit = BW == 64;
2356  if (TestBit < 32 && TestBit >= 0)
2357  Is64Bit = false;
2358 
2359  unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2360  const MCInstrDesc &II = TII.get(Opc);
2361 
2362  unsigned SrcReg = getRegForValue(LHS);
2363  if (!SrcReg)
2364  return false;
2365  bool SrcIsKill = hasTrivialKill(LHS);
2366 
2367  if (BW == 64 && !Is64Bit)
2368  SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2369  AArch64::sub_32);
2370 
2371  if ((BW < 32) && !IsBitTest)
2372  SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
2373 
2374  // Emit the combined compare and branch instruction.
2375  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2376  MachineInstrBuilder MIB =
2377  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2378  .addReg(SrcReg, getKillRegState(SrcIsKill));
2379  if (IsBitTest)
2380  MIB.addImm(TestBit);
2381  MIB.addMBB(TBB);
2382 
2383  finishCondBranch(BI->getParent(), TBB, FBB);
2384  return true;
2385 }
2386 
2387 bool AArch64FastISel::selectBranch(const Instruction *I) {
2388  const BranchInst *BI = cast<BranchInst>(I);
2389  if (BI->isUnconditional()) {
2390  MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2391  fastEmitBranch(MSucc, BI->getDebugLoc());
2392  return true;
2393  }
2394 
2395  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2396  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2397 
2398  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2399  if (CI->hasOneUse() && isValueAvailable(CI)) {
2400  // Try to optimize or fold the cmp.
2401  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2402  switch (Predicate) {
2403  default:
2404  break;
2405  case CmpInst::FCMP_FALSE:
2406  fastEmitBranch(FBB, DbgLoc);
2407  return true;
2408  case CmpInst::FCMP_TRUE:
2409  fastEmitBranch(TBB, DbgLoc);
2410  return true;
2411  }
2412 
2413  // Try to emit a combined compare-and-branch first.
2414  if (emitCompareAndBranch(BI))
2415  return true;
2416 
2417  // Try to take advantage of fallthrough opportunities.
2418  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2419  std::swap(TBB, FBB);
2420  Predicate = CmpInst::getInversePredicate(Predicate);
2421  }
2422 
2423  // Emit the cmp.
2424  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2425  return false;
2426 
2427  // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2428  // instruction.
2429  AArch64CC::CondCode CC = getCompareCC(Predicate);
2431  switch (Predicate) {
2432  default:
2433  break;
2434  case CmpInst::FCMP_UEQ:
2435  ExtraCC = AArch64CC::EQ;
2436  CC = AArch64CC::VS;
2437  break;
2438  case CmpInst::FCMP_ONE:
2439  ExtraCC = AArch64CC::MI;
2440  CC = AArch64CC::GT;
2441  break;
2442  }
2443  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2444 
2445  // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2446  if (ExtraCC != AArch64CC::AL) {
2447  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2448  .addImm(ExtraCC)
2449  .addMBB(TBB);
2450  }
2451 
2452  // Emit the branch.
2453  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2454  .addImm(CC)
2455  .addMBB(TBB);
2456 
2457  finishCondBranch(BI->getParent(), TBB, FBB);
2458  return true;
2459  }
2460  } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2461  uint64_t Imm = CI->getZExtValue();
2462  MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2463  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2464  .addMBB(Target);
2465 
2466  // Obtain the branch probability and add the target to the successor list.
2467  if (FuncInfo.BPI) {
2468  auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2469  BI->getParent(), Target->getBasicBlock());
2470  FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2471  } else
2472  FuncInfo.MBB->addSuccessorWithoutProb(Target);
2473  return true;
2474  } else {
2476  if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2477  // Fake request the condition, otherwise the intrinsic might be completely
2478  // optimized away.
2479  unsigned CondReg = getRegForValue(BI->getCondition());
2480  if (!CondReg)
2481  return false;
2482 
2483  // Emit the branch.
2484  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2485  .addImm(CC)
2486  .addMBB(TBB);
2487 
2488  finishCondBranch(BI->getParent(), TBB, FBB);
2489  return true;
2490  }
2491  }
2492 
2493  unsigned CondReg = getRegForValue(BI->getCondition());
2494  if (CondReg == 0)
2495  return false;
2496  bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2497 
2498  // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2499  unsigned Opcode = AArch64::TBNZW;
2500  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2501  std::swap(TBB, FBB);
2502  Opcode = AArch64::TBZW;
2503  }
2504 
2505  const MCInstrDesc &II = TII.get(Opcode);
2506  unsigned ConstrainedCondReg
2507  = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2508  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2509  .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
2510  .addImm(0)
2511  .addMBB(TBB);
2512 
2513  finishCondBranch(BI->getParent(), TBB, FBB);
2514  return true;
2515 }
2516 
2517 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2518  const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2519  unsigned AddrReg = getRegForValue(BI->getOperand(0));
2520  if (AddrReg == 0)
2521  return false;
2522 
2523  // Emit the indirect branch.
2524  const MCInstrDesc &II = TII.get(AArch64::BR);
2525  AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2526  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2527 
2528  // Make sure the CFG is up-to-date.
2529  for (auto *Succ : BI->successors())
2530  FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2531 
2532  return true;
2533 }
2534 
2535 bool AArch64FastISel::selectCmp(const Instruction *I) {
2536  const CmpInst *CI = cast<CmpInst>(I);
2537 
2538  // Vectors of i1 are weird: bail out.
2539  if (CI->getType()->isVectorTy())
2540  return false;
2541 
2542  // Try to optimize or fold the cmp.
2543  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2544  unsigned ResultReg = 0;
2545  switch (Predicate) {
2546  default:
2547  break;
2548  case CmpInst::FCMP_FALSE:
2549  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2550  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2551  TII.get(TargetOpcode::COPY), ResultReg)
2552  .addReg(AArch64::WZR, getKillRegState(true));
2553  break;
2554  case CmpInst::FCMP_TRUE:
2555  ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2556  break;
2557  }
2558 
2559  if (ResultReg) {
2560  updateValueMap(I, ResultReg);
2561  return true;
2562  }
2563 
2564  // Emit the cmp.
2565  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2566  return false;
2567 
2568  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2569 
2570  // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2571  // condition codes are inverted, because they are used by CSINC.
2572  static unsigned CondCodeTable[2][2] = {
2575  };
2576  unsigned *CondCodes = nullptr;
2577  switch (Predicate) {
2578  default:
2579  break;
2580  case CmpInst::FCMP_UEQ:
2581  CondCodes = &CondCodeTable[0][0];
2582  break;
2583  case CmpInst::FCMP_ONE:
2584  CondCodes = &CondCodeTable[1][0];
2585  break;
2586  }
2587 
2588  if (CondCodes) {
2589  unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2590  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2591  TmpReg1)
2592  .addReg(AArch64::WZR, getKillRegState(true))
2593  .addReg(AArch64::WZR, getKillRegState(true))
2594  .addImm(CondCodes[0]);
2595  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2596  ResultReg)
2597  .addReg(TmpReg1, getKillRegState(true))
2598  .addReg(AArch64::WZR, getKillRegState(true))
2599  .addImm(CondCodes[1]);
2600 
2601  updateValueMap(I, ResultReg);
2602  return true;
2603  }
2604 
2605  // Now set a register based on the comparison.
2606  AArch64CC::CondCode CC = getCompareCC(Predicate);
2607  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2608  AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2609  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2610  ResultReg)
2611  .addReg(AArch64::WZR, getKillRegState(true))
2612  .addReg(AArch64::WZR, getKillRegState(true))
2613  .addImm(invertedCC);
2614 
2615  updateValueMap(I, ResultReg);
2616  return true;
2617 }
2618 
2619 /// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2620 /// value.
2621 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2622  if (!SI->getType()->isIntegerTy(1))
2623  return false;
2624 
2625  const Value *Src1Val, *Src2Val;
2626  unsigned Opc = 0;
2627  bool NeedExtraOp = false;
2628  if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2629  if (CI->isOne()) {
2630  Src1Val = SI->getCondition();
2631  Src2Val = SI->getFalseValue();
2632  Opc = AArch64::ORRWrr;
2633  } else {
2634  assert(CI->isZero());
2635  Src1Val = SI->getFalseValue();
2636  Src2Val = SI->getCondition();
2637  Opc = AArch64::BICWrr;
2638  }
2639  } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2640  if (CI->isOne()) {
2641  Src1Val = SI->getCondition();
2642  Src2Val = SI->getTrueValue();
2643  Opc = AArch64::ORRWrr;
2644  NeedExtraOp = true;
2645  } else {
2646  assert(CI->isZero());
2647  Src1Val = SI->getCondition();
2648  Src2Val = SI->getTrueValue();
2649  Opc = AArch64::ANDWrr;
2650  }
2651  }
2652 
2653  if (!Opc)
2654  return false;
2655 
2656  unsigned Src1Reg = getRegForValue(Src1Val);
2657  if (!Src1Reg)
2658  return false;
2659  bool Src1IsKill = hasTrivialKill(Src1Val);
2660 
2661  unsigned Src2Reg = getRegForValue(Src2Val);
2662  if (!Src2Reg)
2663  return false;
2664  bool Src2IsKill = hasTrivialKill(Src2Val);
2665 
2666  if (NeedExtraOp) {
2667  Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
2668  Src1IsKill = true;
2669  }
2670  unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2671  Src1IsKill, Src2Reg, Src2IsKill);
2672  updateValueMap(SI, ResultReg);
2673  return true;
2674 }
2675 
2676 bool AArch64FastISel::selectSelect(const Instruction *I) {
2677  assert(isa<SelectInst>(I) && "Expected a select instruction.");
2678  MVT VT;
2679  if (!isTypeSupported(I->getType(), VT))
2680  return false;
2681 
2682  unsigned Opc;
2683  const TargetRegisterClass *RC;
2684  switch (VT.SimpleTy) {
2685  default:
2686  return false;
2687  case MVT::i1:
2688  case MVT::i8:
2689  case MVT::i16:
2690  case MVT::i32:
2691  Opc = AArch64::CSELWr;
2692  RC = &AArch64::GPR32RegClass;
2693  break;
2694  case MVT::i64:
2695  Opc = AArch64::CSELXr;
2696  RC = &AArch64::GPR64RegClass;
2697  break;
2698  case MVT::f32:
2699  Opc = AArch64::FCSELSrrr;
2700  RC = &AArch64::FPR32RegClass;
2701  break;
2702  case MVT::f64:
2703  Opc = AArch64::FCSELDrrr;
2704  RC = &AArch64::FPR64RegClass;
2705  break;
2706  }
2707 
2708  const SelectInst *SI = cast<SelectInst>(I);
2709  const Value *Cond = SI->getCondition();
2712 
2713  if (optimizeSelect(SI))
2714  return true;
2715 
2716  // Try to pickup the flags, so we don't have to emit another compare.
2717  if (foldXALUIntrinsic(CC, I, Cond)) {
2718  // Fake request the condition to force emission of the XALU intrinsic.
2719  unsigned CondReg = getRegForValue(Cond);
2720  if (!CondReg)
2721  return false;
2722  } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2723  isValueAvailable(Cond)) {
2724  const auto *Cmp = cast<CmpInst>(Cond);
2725  // Try to optimize or fold the cmp.
2726  CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2727  const Value *FoldSelect = nullptr;
2728  switch (Predicate) {
2729  default:
2730  break;
2731  case CmpInst::FCMP_FALSE:
2732  FoldSelect = SI->getFalseValue();
2733  break;
2734  case CmpInst::FCMP_TRUE:
2735  FoldSelect = SI->getTrueValue();
2736  break;
2737  }
2738 
2739  if (FoldSelect) {
2740  unsigned SrcReg = getRegForValue(FoldSelect);
2741  if (!SrcReg)
2742  return false;
2743  unsigned UseReg = lookUpRegForValue(SI);
2744  if (UseReg)
2745  MRI.clearKillFlags(UseReg);
2746 
2747  updateValueMap(I, SrcReg);
2748  return true;
2749  }
2750 
2751  // Emit the cmp.
2752  if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2753  return false;
2754 
2755  // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2756  CC = getCompareCC(Predicate);
2757  switch (Predicate) {
2758  default:
2759  break;
2760  case CmpInst::FCMP_UEQ:
2761  ExtraCC = AArch64CC::EQ;
2762  CC = AArch64CC::VS;
2763  break;
2764  case CmpInst::FCMP_ONE:
2765  ExtraCC = AArch64CC::MI;
2766  CC = AArch64CC::GT;
2767  break;
2768  }
2769  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2770  } else {
2771  unsigned CondReg = getRegForValue(Cond);
2772  if (!CondReg)
2773  return false;
2774  bool CondIsKill = hasTrivialKill(Cond);
2775 
2776  const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2777  CondReg = constrainOperandRegClass(II, CondReg, 1);
2778 
2779  // Emit a TST instruction (ANDS wzr, reg, #imm).
2780  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2781  AArch64::WZR)
2782  .addReg(CondReg, getKillRegState(CondIsKill))
2784  }
2785 
2786  unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2787  bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
2788 
2789  unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2790  bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
2791 
2792  if (!Src1Reg || !Src2Reg)
2793  return false;
2794 
2795  if (ExtraCC != AArch64CC::AL) {
2796  Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2797  Src2IsKill, ExtraCC);
2798  Src2IsKill = true;
2799  }
2800  unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2801  Src2IsKill, CC);
2802  updateValueMap(I, ResultReg);
2803  return true;
2804 }
2805 
2806 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2807  Value *V = I->getOperand(0);
2808  if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2809  return false;
2810 
2811  unsigned Op = getRegForValue(V);
2812  if (Op == 0)
2813  return false;
2814 
2815  unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2816  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2817  ResultReg).addReg(Op);
2818  updateValueMap(I, ResultReg);
2819  return true;
2820 }
2821 
2822 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2823  Value *V = I->getOperand(0);
2824  if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2825  return false;
2826 
2827  unsigned Op = getRegForValue(V);
2828  if (Op == 0)
2829  return false;
2830 
2831  unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2832  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2833  ResultReg).addReg(Op);
2834  updateValueMap(I, ResultReg);
2835  return true;
2836 }
2837 
2838 // FPToUI and FPToSI
2839 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2840  MVT DestVT;
2841  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2842  return false;
2843 
2844  unsigned SrcReg = getRegForValue(I->getOperand(0));
2845  if (SrcReg == 0)
2846  return false;
2847 
2848  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2849  if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2850  return false;
2851 
2852  unsigned Opc;
2853  if (SrcVT == MVT::f64) {
2854  if (Signed)
2855  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2856  else
2857  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2858  } else {
2859  if (Signed)
2860  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2861  else
2862  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2863  }
2864  unsigned ResultReg = createResultReg(
2865  DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2866  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2867  .addReg(SrcReg);
2868  updateValueMap(I, ResultReg);
2869  return true;
2870 }
2871 
2872 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2873  MVT DestVT;
2874  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2875  return false;
2876  // Let regular ISEL handle FP16
2877  if (DestVT == MVT::f16)
2878  return false;
2879 
2880  assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2881  "Unexpected value type.");
2882 
2883  unsigned SrcReg = getRegForValue(I->getOperand(0));
2884  if (!SrcReg)
2885  return false;
2886  bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2887 
2888  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2889 
2890  // Handle sign-extension.
2891  if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2892  SrcReg =
2893  emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2894  if (!SrcReg)
2895  return false;
2896  SrcIsKill = true;
2897  }
2898 
2899  unsigned Opc;
2900  if (SrcVT == MVT::i64) {
2901  if (Signed)
2902  Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2903  else
2904  Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2905  } else {
2906  if (Signed)
2907  Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2908  else
2909  Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2910  }
2911 
2912  unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2913  SrcIsKill);
2914  updateValueMap(I, ResultReg);
2915  return true;
2916 }
2917 
2918 bool AArch64FastISel::fastLowerArguments() {
2919  if (!FuncInfo.CanLowerReturn)
2920  return false;
2921 
2922  const Function *F = FuncInfo.Fn;
2923  if (F->isVarArg())
2924  return false;
2925 
2926  CallingConv::ID CC = F->getCallingConv();
2927  if (CC != CallingConv::C && CC != CallingConv::Swift)
2928  return false;
2929 
2930  if (Subtarget->hasCustomCallingConv())
2931  return false;
2932 
2933  // Only handle simple cases of up to 8 GPR and FPR each.
2934  unsigned GPRCnt = 0;
2935  unsigned FPRCnt = 0;
2936  for (auto const &Arg : F->args()) {
2937  if (Arg.hasAttribute(Attribute::ByVal) ||
2938  Arg.hasAttribute(Attribute::InReg) ||
2939  Arg.hasAttribute(Attribute::StructRet) ||
2940  Arg.hasAttribute(Attribute::SwiftSelf) ||
2941  Arg.hasAttribute(Attribute::SwiftError) ||
2942  Arg.hasAttribute(Attribute::Nest))
2943  return false;
2944 
2945  Type *ArgTy = Arg.getType();
2946  if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2947  return false;
2948 
2949  EVT ArgVT = TLI.getValueType(DL, ArgTy);
2950  if (!ArgVT.isSimple())
2951  return false;
2952 
2953  MVT VT = ArgVT.getSimpleVT().SimpleTy;
2954  if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2955  return false;
2956 
2957  if (VT.isVector() &&
2958  (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2959  return false;
2960 
2961  if (VT >= MVT::i1 && VT <= MVT::i64)
2962  ++GPRCnt;
2963  else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2964  VT.is128BitVector())
2965  ++FPRCnt;
2966  else
2967  return false;
2968 
2969  if (GPRCnt > 8 || FPRCnt > 8)
2970  return false;
2971  }
2972 
2973  static const MCPhysReg Registers[6][8] = {
2974  { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2975  AArch64::W5, AArch64::W6, AArch64::W7 },
2976  { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2977  AArch64::X5, AArch64::X6, AArch64::X7 },
2978  { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2979  AArch64::H5, AArch64::H6, AArch64::H7 },
2980  { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2981  AArch64::S5, AArch64::S6, AArch64::S7 },
2982  { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2983  AArch64::D5, AArch64::D6, AArch64::D7 },
2984  { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2985  AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2986  };
2987 
2988  unsigned GPRIdx = 0;
2989  unsigned FPRIdx = 0;
2990  for (auto const &Arg : F->args()) {
2991  MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2992  unsigned SrcReg;
2993  const TargetRegisterClass *RC;
2994  if (VT >= MVT::i1 && VT <= MVT::i32) {
2995  SrcReg = Registers[0][GPRIdx++];
2996  RC = &AArch64::GPR32RegClass;
2997  VT = MVT::i32;
2998  } else if (VT == MVT::i64) {
2999  SrcReg = Registers[1][GPRIdx++];
3000  RC = &AArch64::GPR64RegClass;
3001  } else if (VT == MVT::f16) {
3002  SrcReg = Registers[2][FPRIdx++];
3003  RC = &AArch64::FPR16RegClass;
3004  } else if (VT == MVT::f32) {
3005  SrcReg = Registers[3][FPRIdx++];
3006  RC = &AArch64::FPR32RegClass;
3007  } else if ((VT == MVT::f64) || VT.is64BitVector()) {
3008  SrcReg = Registers[4][FPRIdx++];
3009  RC = &AArch64::FPR64RegClass;
3010  } else if (VT.is128BitVector()) {
3011  SrcReg = Registers[5][FPRIdx++];
3012  RC = &AArch64::FPR128RegClass;
3013  } else
3014  llvm_unreachable("Unexpected value type.");
3015 
3016  unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3017  // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3018  // Without this, EmitLiveInCopies may eliminate the livein if its only
3019  // use is a bitcast (which isn't turned into an instruction).
3020  unsigned ResultReg = createResultReg(RC);
3021  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3022  TII.get(TargetOpcode::COPY), ResultReg)
3023  .addReg(DstReg, getKillRegState(true));
3024  updateValueMap(&Arg, ResultReg);
3025  }
3026  return true;
3027 }
3028 
3029 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3030  SmallVectorImpl<MVT> &OutVTs,
3031  unsigned &NumBytes) {
3032  CallingConv::ID CC = CLI.CallConv;
3034  CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3035  CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3036 
3037  // Get a count of how many bytes are to be pushed on the stack.
3038  NumBytes = CCInfo.getNextStackOffset();
3039 
3040  // Issue CALLSEQ_START
3041  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3042  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3043  .addImm(NumBytes).addImm(0);
3044 
3045  // Process the args.
3046  for (CCValAssign &VA : ArgLocs) {
3047  const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3048  MVT ArgVT = OutVTs[VA.getValNo()];
3049 
3050  unsigned ArgReg = getRegForValue(ArgVal);
3051  if (!ArgReg)
3052  return false;
3053 
3054  // Handle arg promotion: SExt, ZExt, AExt.
3055  switch (VA.getLocInfo()) {
3056  case CCValAssign::Full:
3057  break;
3058  case CCValAssign::SExt: {
3059  MVT DestVT = VA.getLocVT();
3060  MVT SrcVT = ArgVT;
3061  ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3062  if (!ArgReg)
3063  return false;
3064  break;
3065  }
3066  case CCValAssign::AExt:
3067  // Intentional fall-through.
3068  case CCValAssign::ZExt: {
3069  MVT DestVT = VA.getLocVT();
3070  MVT SrcVT = ArgVT;
3071  ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3072  if (!ArgReg)
3073  return false;
3074  break;
3075  }
3076  default:
3077  llvm_unreachable("Unknown arg promotion!");
3078  }
3079 
3080  // Now copy/store arg to correct locations.
3081  if (VA.isRegLoc() && !VA.needsCustom()) {
3082  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3083  TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3084  CLI.OutRegs.push_back(VA.getLocReg());
3085  } else if (VA.needsCustom()) {
3086  // FIXME: Handle custom args.
3087  return false;
3088  } else {
3089  assert(VA.isMemLoc() && "Assuming store on stack.");
3090 
3091  // Don't emit stores for undef values.
3092  if (isa<UndefValue>(ArgVal))
3093  continue;
3094 
3095  // Need to store on the stack.
3096  unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3097 
3098  unsigned BEAlign = 0;
3099  if (ArgSize < 8 && !Subtarget->isLittleEndian())
3100  BEAlign = 8 - ArgSize;
3101 
3102  Address Addr;
3103  Addr.setKind(Address::RegBase);
3104  Addr.setReg(AArch64::SP);
3105  Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3106 
3107  unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3108  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3109  MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3110  MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3111 
3112  if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3113  return false;
3114  }
3115  }
3116  return true;
3117 }
3118 
3119 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3120  unsigned NumBytes) {
3121  CallingConv::ID CC = CLI.CallConv;
3122 
3123  // Issue CALLSEQ_END
3124  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3125  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3126  .addImm(NumBytes).addImm(0);
3127 
3128  // Now the return value.
3129  if (RetVT != MVT::isVoid) {
3131  CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3132  CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3133 
3134  // Only handle a single return value.
3135  if (RVLocs.size() != 1)
3136  return false;
3137 
3138  // Copy all of the result registers out of their specified physreg.
3139  MVT CopyVT = RVLocs[0].getValVT();
3140 
3141  // TODO: Handle big-endian results
3142  if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3143  return false;
3144 
3145  unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3146  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3147  TII.get(TargetOpcode::COPY), ResultReg)
3148  .addReg(RVLocs[0].getLocReg());
3149  CLI.InRegs.push_back(RVLocs[0].getLocReg());
3150 
3151  CLI.ResultReg = ResultReg;
3152  CLI.NumResultRegs = 1;
3153  }
3154 
3155  return true;
3156 }
3157 
3158 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3159  CallingConv::ID CC = CLI.CallConv;
3160  bool IsTailCall = CLI.IsTailCall;
3161  bool IsVarArg = CLI.IsVarArg;
3162  const Value *Callee = CLI.Callee;
3163  MCSymbol *Symbol = CLI.Symbol;
3164 
3165  if (!Callee && !Symbol)
3166  return false;
3167 
3168  // Allow SelectionDAG isel to handle tail calls.
3169  if (IsTailCall)
3170  return false;
3171 
3172  CodeModel::Model CM = TM.getCodeModel();
3173  // Only support the small-addressing and large code models.
3174  if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3175  return false;
3176 
3177  // FIXME: Add large code model support for ELF.
3178  if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3179  return false;
3180 
3181  // Let SDISel handle vararg functions.
3182  if (IsVarArg)
3183  return false;
3184 
3185  // FIXME: Only handle *simple* calls for now.
3186  MVT RetVT;
3187  if (CLI.RetTy->isVoidTy())
3188  RetVT = MVT::isVoid;
3189  else if (!isTypeLegal(CLI.RetTy, RetVT))
3190  return false;
3191 
3192  for (auto Flag : CLI.OutFlags)
3193  if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3194  Flag.isSwiftSelf() || Flag.isSwiftError())
3195  return false;
3196 
3197  // Set up the argument vectors.
3198  SmallVector<MVT, 16> OutVTs;
3199  OutVTs.reserve(CLI.OutVals.size());
3200 
3201  for (auto *Val : CLI.OutVals) {
3202  MVT VT;
3203  if (!isTypeLegal(Val->getType(), VT) &&
3204  !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3205  return false;
3206 
3207  // We don't handle vector parameters yet.
3208  if (VT.isVector() || VT.getSizeInBits() > 64)
3209  return false;
3210 
3211  OutVTs.push_back(VT);
3212  }
3213 
3214  Address Addr;
3215  if (Callee && !computeCallAddress(Callee, Addr))
3216  return false;
3217 
3218  // Handle the arguments now that we've gotten them.
3219  unsigned NumBytes;
3220  if (!processCallArgs(CLI, OutVTs, NumBytes))
3221  return false;
3222 
3223  const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3224  if (RegInfo->isAnyArgRegReserved(*MF))
3225  RegInfo->emitReservedArgRegCallError(*MF);
3226 
3227  // Issue the call.
3228  MachineInstrBuilder MIB;
3229  if (Subtarget->useSmallAddressing()) {
3230  const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
3231  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3232  if (Symbol)
3233  MIB.addSym(Symbol, 0);
3234  else if (Addr.getGlobalValue())
3235  MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3236  else if (Addr.getReg()) {
3237  unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3238  MIB.addReg(Reg);
3239  } else
3240  return false;
3241  } else {
3242  unsigned CallReg = 0;
3243  if (Symbol) {
3244  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3245  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3246  ADRPReg)
3247  .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3248 
3249  CallReg = createResultReg(&AArch64::GPR64RegClass);
3250  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3251  TII.get(AArch64::LDRXui), CallReg)
3252  .addReg(ADRPReg)
3253  .addSym(Symbol,
3255  } else if (Addr.getGlobalValue())
3256  CallReg = materializeGV(Addr.getGlobalValue());
3257  else if (Addr.getReg())
3258  CallReg = Addr.getReg();
3259 
3260  if (!CallReg)
3261  return false;
3262 
3263  const MCInstrDesc &II = TII.get(AArch64::BLR);
3264  CallReg = constrainOperandRegClass(II, CallReg, 0);
3265  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3266  }
3267 
3268  // Add implicit physical register uses to the call.
3269  for (auto Reg : CLI.OutRegs)
3270  MIB.addReg(Reg, RegState::Implicit);
3271 
3272  // Add a register mask with the call-preserved registers.
3273  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3274  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3275 
3276  CLI.Call = MIB;
3277 
3278  // Finish off the call including any return values.
3279  return finishCall(CLI, RetVT, NumBytes);
3280 }
3281 
3282 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3283  if (Alignment)
3284  return Len / Alignment <= 4;
3285  else
3286  return Len < 32;
3287 }
3288 
3289 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3290  uint64_t Len, unsigned Alignment) {
3291  // Make sure we don't bloat code by inlining very large memcpy's.
3292  if (!isMemCpySmall(Len, Alignment))
3293  return false;
3294 
3295  int64_t UnscaledOffset = 0;
3296  Address OrigDest = Dest;
3297  Address OrigSrc = Src;
3298 
3299  while (Len) {
3300  MVT VT;
3301  if (!Alignment || Alignment >= 8) {
3302  if (Len >= 8)
3303  VT = MVT::i64;
3304  else if (Len >= 4)
3305  VT = MVT::i32;
3306  else if (Len >= 2)
3307  VT = MVT::i16;
3308  else {
3309  VT = MVT::i8;
3310  }
3311  } else {
3312  // Bound based on alignment.
3313  if (Len >= 4 && Alignment == 4)
3314  VT = MVT::i32;
3315  else if (Len >= 2 && Alignment == 2)
3316  VT = MVT::i16;
3317  else {
3318  VT = MVT::i8;
3319  }
3320  }
3321 
3322  unsigned ResultReg = emitLoad(VT, VT, Src);
3323  if (!ResultReg)
3324  return false;
3325 
3326  if (!emitStore(VT, ResultReg, Dest))
3327  return false;
3328 
3329  int64_t Size = VT.getSizeInBits() / 8;
3330  Len -= Size;
3331  UnscaledOffset += Size;
3332 
3333  // We need to recompute the unscaled offset for each iteration.
3334  Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3335  Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3336  }
3337 
3338  return true;
3339 }
3340 
3341 /// Check if it is possible to fold the condition from the XALU intrinsic
3342 /// into the user. The condition code will only be updated on success.
3343 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3344  const Instruction *I,
3345  const Value *Cond) {
3346  if (!isa<ExtractValueInst>(Cond))
3347  return false;
3348 
3349  const auto *EV = cast<ExtractValueInst>(Cond);
3350  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3351  return false;
3352 
3353  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3354  MVT RetVT;
3355  const Function *Callee = II->getCalledFunction();
3356  Type *RetTy =
3357  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3358  if (!isTypeLegal(RetTy, RetVT))
3359  return false;
3360 
3361  if (RetVT != MVT::i32 && RetVT != MVT::i64)
3362  return false;
3363 
3364  const Value *LHS = II->getArgOperand(0);
3365  const Value *RHS = II->getArgOperand(1);
3366 
3367  // Canonicalize immediate to the RHS.
3368  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3369  isCommutativeIntrinsic(II))
3370  std::swap(LHS, RHS);
3371 
3372  // Simplify multiplies.
3373  Intrinsic::ID IID = II->getIntrinsicID();
3374  switch (IID) {
3375  default:
3376  break;
3378  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3379  if (C->getValue() == 2)
3381  break;
3383  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3384  if (C->getValue() == 2)
3386  break;
3387  }
3388 
3389  AArch64CC::CondCode TmpCC;
3390  switch (IID) {
3391  default:
3392  return false;
3395  TmpCC = AArch64CC::VS;
3396  break;
3398  TmpCC = AArch64CC::HS;
3399  break;
3401  TmpCC = AArch64CC::LO;
3402  break;
3405  TmpCC = AArch64CC::NE;
3406  break;
3407  }
3408 
3409  // Check if both instructions are in the same basic block.
3410  if (!isValueAvailable(II))
3411  return false;
3412 
3413  // Make sure nothing is in the way
3414  BasicBlock::const_iterator Start(I);
3416  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3417  // We only expect extractvalue instructions between the intrinsic and the
3418  // instruction to be selected.
3419  if (!isa<ExtractValueInst>(Itr))
3420  return false;
3421 
3422  // Check that the extractvalue operand comes from the intrinsic.
3423  const auto *EVI = cast<ExtractValueInst>(Itr);
3424  if (EVI->getAggregateOperand() != II)
3425  return false;
3426  }
3427 
3428  CC = TmpCC;
3429  return true;
3430 }
3431 
3432 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3433  // FIXME: Handle more intrinsics.
3434  switch (II->getIntrinsicID()) {
3435  default: return false;
3436  case Intrinsic::frameaddress: {
3437  MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3438  MFI.setFrameAddressIsTaken(true);
3439 
3440  const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3441  unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3442  unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3443  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3444  TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3445  // Recursively load frame address
3446  // ldr x0, [fp]
3447  // ldr x0, [x0]
3448  // ldr x0, [x0]
3449  // ...
3450  unsigned DestReg;
3451  unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3452  while (Depth--) {
3453  DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3454  SrcReg, /*IsKill=*/true, 0);
3455  assert(DestReg && "Unexpected LDR instruction emission failure.");
3456  SrcReg = DestReg;
3457  }
3458 
3459  updateValueMap(II, SrcReg);
3460  return true;
3461  }
3462  case Intrinsic::sponentry: {
3463  MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3464 
3465  // SP = FP + Fixed Object + 16
3466  int FI = MFI.CreateFixedObject(4, 0, false);
3467  unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3468  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3469  TII.get(AArch64::ADDXri), ResultReg)
3470  .addFrameIndex(FI)
3471  .addImm(0)
3472  .addImm(0);
3473 
3474  updateValueMap(II, ResultReg);
3475  return true;
3476  }
3477  case Intrinsic::memcpy:
3478  case Intrinsic::memmove: {
3479  const auto *MTI = cast<MemTransferInst>(II);
3480  // Don't handle volatile.
3481  if (MTI->isVolatile())
3482  return false;
3483 
3484  // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3485  // we would emit dead code because we don't currently handle memmoves.
3486  bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3487  if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3488  // Small memcpy's are common enough that we want to do them without a call
3489  // if possible.
3490  uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3491  unsigned Alignment = MinAlign(MTI->getDestAlignment(),
3492  MTI->getSourceAlignment());
3493  if (isMemCpySmall(Len, Alignment)) {
3494  Address Dest, Src;
3495  if (!computeAddress(MTI->getRawDest(), Dest) ||
3496  !computeAddress(MTI->getRawSource(), Src))
3497  return false;
3498  if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3499  return true;
3500  }
3501  }
3502 
3503  if (!MTI->getLength()->getType()->isIntegerTy(64))
3504  return false;
3505 
3506  if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3507  // Fast instruction selection doesn't support the special
3508  // address spaces.
3509  return false;
3510 
3511  const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3512  return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1);
3513  }
3514  case Intrinsic::memset: {
3515  const MemSetInst *MSI = cast<MemSetInst>(II);
3516  // Don't handle volatile.
3517  if (MSI->isVolatile())
3518  return false;
3519 
3520  if (!MSI->getLength()->getType()->isIntegerTy(64))
3521  return false;
3522 
3523  if (MSI->getDestAddressSpace() > 255)
3524  // Fast instruction selection doesn't support the special
3525  // address spaces.
3526  return false;
3527 
3528  return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
3529  }
3530  case Intrinsic::sin:
3531  case Intrinsic::cos:
3532  case Intrinsic::pow: {
3533  MVT RetVT;
3534  if (!isTypeLegal(II->getType(), RetVT))
3535  return false;
3536 
3537  if (RetVT != MVT::f32 && RetVT != MVT::f64)
3538  return false;
3539 
3540  static const RTLIB::Libcall LibCallTable[3][2] = {
3541  { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3542  { RTLIB::COS_F32, RTLIB::COS_F64 },
3543  { RTLIB::POW_F32, RTLIB::POW_F64 }
3544  };
3545  RTLIB::Libcall LC;
3546  bool Is64Bit = RetVT == MVT::f64;
3547  switch (II->getIntrinsicID()) {
3548  default:
3549  llvm_unreachable("Unexpected intrinsic.");
3550  case Intrinsic::sin:
3551  LC = LibCallTable[0][Is64Bit];
3552  break;
3553  case Intrinsic::cos:
3554  LC = LibCallTable[1][Is64Bit];
3555  break;
3556  case Intrinsic::pow:
3557  LC = LibCallTable[2][Is64Bit];
3558  break;
3559  }
3560 
3561  ArgListTy Args;
3562  Args.reserve(II->getNumArgOperands());
3563 
3564  // Populate the argument list.
3565  for (auto &Arg : II->arg_operands()) {
3566  ArgListEntry Entry;
3567  Entry.Val = Arg;
3568  Entry.Ty = Arg->getType();
3569  Args.push_back(Entry);
3570  }
3571 
3572  CallLoweringInfo CLI;
3573  MCContext &Ctx = MF->getContext();
3574  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3575  TLI.getLibcallName(LC), std::move(Args));
3576  if (!lowerCallTo(CLI))
3577  return false;
3578  updateValueMap(II, CLI.ResultReg);
3579  return true;
3580  }
3581  case Intrinsic::fabs: {
3582  MVT VT;
3583  if (!isTypeLegal(II->getType(), VT))
3584  return false;
3585 
3586  unsigned Opc;
3587  switch (VT.SimpleTy) {
3588  default:
3589  return false;
3590  case MVT::f32:
3591  Opc = AArch64::FABSSr;
3592  break;
3593  case MVT::f64:
3594  Opc = AArch64::FABSDr;
3595  break;
3596  }
3597  unsigned SrcReg = getRegForValue(II->getOperand(0));
3598  if (!SrcReg)
3599  return false;
3600  bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
3601  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3602  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3603  .addReg(SrcReg, getKillRegState(SrcRegIsKill));
3604  updateValueMap(II, ResultReg);
3605  return true;
3606  }
3607  case Intrinsic::trap:
3608  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3609  .addImm(1);
3610  return true;
3611 
3612  case Intrinsic::sqrt: {
3613  Type *RetTy = II->getCalledFunction()->getReturnType();
3614 
3615  MVT VT;
3616  if (!isTypeLegal(RetTy, VT))
3617  return false;
3618 
3619  unsigned Op0Reg = getRegForValue(II->getOperand(0));
3620  if (!Op0Reg)
3621  return false;
3622  bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3623 
3624  unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3625  if (!ResultReg)
3626  return false;
3627 
3628  updateValueMap(II, ResultReg);
3629  return true;
3630  }
3637  // This implements the basic lowering of the xalu with overflow intrinsics.
3638  const Function *Callee = II->getCalledFunction();
3639  auto *Ty = cast<StructType>(Callee->getReturnType());
3640  Type *RetTy = Ty->getTypeAtIndex(0U);
3641 
3642  MVT VT;
3643  if (!isTypeLegal(RetTy, VT))
3644  return false;
3645 
3646  if (VT != MVT::i32 && VT != MVT::i64)
3647  return false;
3648 
3649  const Value *LHS = II->getArgOperand(0);
3650  const Value *RHS = II->getArgOperand(1);
3651  // Canonicalize immediate to the RHS.
3652  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3653  isCommutativeIntrinsic(II))
3654  std::swap(LHS, RHS);
3655 
3656  // Simplify multiplies.
3657  Intrinsic::ID IID = II->getIntrinsicID();
3658  switch (IID) {
3659  default:
3660  break;
3662  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3663  if (C->getValue() == 2) {
3665  RHS = LHS;
3666  }
3667  break;
3669  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3670  if (C->getValue() == 2) {
3672  RHS = LHS;
3673  }
3674  break;
3675  }
3676 
3677  unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3679  switch (IID) {
3680  default: llvm_unreachable("Unexpected intrinsic!");
3682  ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3683  CC = AArch64CC::VS;
3684  break;
3686  ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3687  CC = AArch64CC::HS;
3688  break;
3690  ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3691  CC = AArch64CC::VS;
3692  break;
3694  ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3695  CC = AArch64CC::LO;
3696  break;
3698  CC = AArch64CC::NE;
3699  unsigned LHSReg = getRegForValue(LHS);
3700  if (!LHSReg)
3701  return false;
3702  bool LHSIsKill = hasTrivialKill(LHS);
3703 
3704  unsigned RHSReg = getRegForValue(RHS);
3705  if (!RHSReg)
3706  return false;
3707  bool RHSIsKill = hasTrivialKill(RHS);
3708 
3709  if (VT == MVT::i32) {
3710  MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3711  unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3712  /*IsKill=*/false, 32);
3713  MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3714  AArch64::sub_32);
3715  ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3716  AArch64::sub_32);
3717  emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3718  AArch64_AM::ASR, 31, /*WantResult=*/false);
3719  } else {
3720  assert(VT == MVT::i64 && "Unexpected value type.");
3721  // LHSReg and RHSReg cannot be killed by this Mul, since they are
3722  // reused in the next instruction.
3723  MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3724  /*IsKill=*/false);
3725  unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3726  RHSReg, RHSIsKill);
3727  emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3728  AArch64_AM::ASR, 63, /*WantResult=*/false);
3729  }
3730  break;
3731  }
3733  CC = AArch64CC::NE;
3734  unsigned LHSReg = getRegForValue(LHS);
3735  if (!LHSReg)
3736  return false;
3737  bool LHSIsKill = hasTrivialKill(LHS);
3738 
3739  unsigned RHSReg = getRegForValue(RHS);
3740  if (!RHSReg)
3741  return false;
3742  bool RHSIsKill = hasTrivialKill(RHS);
3743 
3744  if (VT == MVT::i32) {
3745  MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3746  emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3747  /*IsKill=*/false, AArch64_AM::LSR, 32,
3748  /*WantResult=*/false);
3749  MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3750  AArch64::sub_32);
3751  } else {
3752  assert(VT == MVT::i64 && "Unexpected value type.");
3753  // LHSReg and RHSReg cannot be killed by this Mul, since they are
3754  // reused in the next instruction.
3755  MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3756  /*IsKill=*/false);
3757  unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3758  RHSReg, RHSIsKill);
3759  emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3760  /*IsKill=*/false, /*WantResult=*/false);
3761  }
3762  break;
3763  }
3764  }
3765 
3766  if (MulReg) {
3767  ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3768  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3769  TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3770  }
3771 
3772  if (!ResultReg1)
3773  return false;
3774 
3775  ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3776  AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3777  /*IsKill=*/true, getInvertedCondCode(CC));
3778  (void)ResultReg2;
3779  assert((ResultReg1 + 1) == ResultReg2 &&
3780  "Nonconsecutive result registers.");
3781  updateValueMap(II, ResultReg1, 2);
3782  return true;
3783  }
3784  }
3785  return false;
3786 }
3787 
3788 bool AArch64FastISel::selectRet(const Instruction *I) {
3789  const ReturnInst *Ret = cast<ReturnInst>(I);
3790  const Function &F = *I->getParent()->getParent();
3791 
3792  if (!FuncInfo.CanLowerReturn)
3793  return false;
3794 
3795  if (F.isVarArg())
3796  return false;
3797 
3798  if (TLI.supportSwiftError() &&
3799  F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3800  return false;
3801 
3802  if (TLI.supportSplitCSR(FuncInfo.MF))
3803  return false;
3804 
3805  // Build a list of return value registers.
3806  SmallVector<unsigned, 4> RetRegs;
3807 
3808  if (Ret->getNumOperands() > 0) {
3809  CallingConv::ID CC = F.getCallingConv();
3811  GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3812 
3813  // Analyze operands of the call, assigning locations to each operand.
3815  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3816  CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3817  : RetCC_AArch64_AAPCS;
3818  CCInfo.AnalyzeReturn(Outs, RetCC);
3819 
3820  // Only handle a single return value for now.
3821  if (ValLocs.size() != 1)
3822  return false;
3823 
3824  CCValAssign &VA = ValLocs[0];
3825  const Value *RV = Ret->getOperand(0);
3826 
3827  // Don't bother handling odd stuff for now.
3828  if ((VA.getLocInfo() != CCValAssign::Full) &&
3829  (VA.getLocInfo() != CCValAssign::BCvt))
3830  return false;
3831 
3832  // Only handle register returns for now.
3833  if (!VA.isRegLoc())
3834  return false;
3835 
3836  unsigned Reg = getRegForValue(RV);
3837  if (Reg == 0)
3838  return false;
3839 
3840  unsigned SrcReg = Reg + VA.getValNo();
3841  unsigned DestReg = VA.getLocReg();
3842  // Avoid a cross-class copy. This is very unlikely.
3843  if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3844  return false;
3845 
3846  EVT RVEVT = TLI.getValueType(DL, RV->getType());
3847  if (!RVEVT.isSimple())
3848  return false;
3849 
3850  // Vectors (of > 1 lane) in big endian need tricky handling.
3851  if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
3852  !Subtarget->isLittleEndian())
3853  return false;
3854 
3855  MVT RVVT = RVEVT.getSimpleVT();
3856  if (RVVT == MVT::f128)
3857  return false;
3858 
3859  MVT DestVT = VA.getValVT();
3860  // Special handling for extended integers.
3861  if (RVVT != DestVT) {
3862  if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3863  return false;
3864 
3865  if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3866  return false;
3867 
3868  bool IsZExt = Outs[0].Flags.isZExt();
3869  SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3870  if (SrcReg == 0)
3871  return false;
3872  }
3873 
3874  // Make the copy.
3875  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3876  TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3877 
3878  // Add register to return instruction.
3879  RetRegs.push_back(VA.getLocReg());
3880  }
3881 
3882  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3883  TII.get(AArch64::RET_ReallyLR));
3884  for (unsigned RetReg : RetRegs)
3885  MIB.addReg(RetReg, RegState::Implicit);
3886  return true;
3887 }
3888 
3889 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3890  Type *DestTy = I->getType();
3891  Value *Op = I->getOperand(0);
3892  Type *SrcTy = Op->getType();
3893 
3894  EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3895  EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3896  if (!SrcEVT.isSimple())
3897  return false;
3898  if (!DestEVT.isSimple())
3899  return false;
3900 
3901  MVT SrcVT = SrcEVT.getSimpleVT();
3902  MVT DestVT = DestEVT.getSimpleVT();
3903 
3904  if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3905  SrcVT != MVT::i8)
3906  return false;
3907  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3908  DestVT != MVT::i1)
3909  return false;
3910 
3911  unsigned SrcReg = getRegForValue(Op);
3912  if (!SrcReg)
3913  return false;
3914  bool SrcIsKill = hasTrivialKill(Op);
3915 
3916  // If we're truncating from i64 to a smaller non-legal type then generate an
3917  // AND. Otherwise, we know the high bits are undefined and a truncate only
3918  // generate a COPY. We cannot mark the source register also as result
3919  // register, because this can incorrectly transfer the kill flag onto the
3920  // source register.
3921  unsigned ResultReg;
3922  if (SrcVT == MVT::i64) {
3923  uint64_t Mask = 0;
3924  switch (DestVT.SimpleTy) {
3925  default:
3926  // Trunc i64 to i32 is handled by the target-independent fast-isel.
3927  return false;
3928  case MVT::i1:
3929  Mask = 0x1;
3930  break;
3931  case MVT::i8:
3932  Mask = 0xff;
3933  break;
3934  case MVT::i16:
3935  Mask = 0xffff;
3936  break;
3937  }
3938  // Issue an extract_subreg to get the lower 32-bits.
3939  unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3940  AArch64::sub_32);
3941  // Create the AND instruction which performs the actual truncation.
3942  ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3943  assert(ResultReg && "Unexpected AND instruction emission failure.");
3944  } else {
3945  ResultReg = createResultReg(&AArch64::GPR32RegClass);
3946  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3947  TII.get(TargetOpcode::COPY), ResultReg)
3948  .addReg(SrcReg, getKillRegState(SrcIsKill));
3949  }
3950 
3951  updateValueMap(I, ResultReg);
3952  return true;
3953 }
3954 
3955 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3956  assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3957  DestVT == MVT::i64) &&
3958  "Unexpected value type.");
3959  // Handle i8 and i16 as i32.
3960  if (DestVT == MVT::i8 || DestVT == MVT::i16)
3961  DestVT = MVT::i32;
3962 
3963  if (IsZExt) {
3964  unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
3965  assert(ResultReg && "Unexpected AND instruction emission failure.");
3966  if (DestVT == MVT::i64) {
3967  // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
3968  // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
3969  unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3970  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3971  TII.get(AArch64::SUBREG_TO_REG), Reg64)
3972  .addImm(0)
3973  .addReg(ResultReg)
3974  .addImm(AArch64::sub_32);
3975  ResultReg = Reg64;
3976  }
3977  return ResultReg;
3978  } else {
3979  if (DestVT == MVT::i64) {
3980  // FIXME: We're SExt i1 to i64.
3981  return 0;
3982  }
3983  return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3984  /*TODO:IsKill=*/false, 0, 0);
3985  }
3986 }
3987 
3988 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3989  unsigned Op1, bool Op1IsKill) {
3990  unsigned Opc, ZReg;
3991  switch (RetVT.SimpleTy) {
3992  default: return 0;
3993  case MVT::i8:
3994  case MVT::i16:
3995  case MVT::i32:
3996  RetVT = MVT::i32;
3997  Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3998  case MVT::i64:
3999  Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4000  }
4001 
4002  const TargetRegisterClass *RC =
4003  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4004  return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
4005  /*IsKill=*/ZReg, true);
4006 }
4007 
4008 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
4009  unsigned Op1, bool Op1IsKill) {
4010  if (RetVT != MVT::i64)
4011  return 0;
4012 
4013  return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4014  Op0, Op0IsKill, Op1, Op1IsKill,
4015  AArch64::XZR, /*IsKill=*/true);
4016 }
4017 
4018 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
4019  unsigned Op1, bool Op1IsKill) {
4020  if (RetVT != MVT::i64)
4021  return 0;
4022 
4023  return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4024  Op0, Op0IsKill, Op1, Op1IsKill,
4025  AArch64::XZR, /*IsKill=*/true);
4026 }
4027 
4028 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4029  unsigned Op1Reg, bool Op1IsKill) {
4030  unsigned Opc = 0;
4031  bool NeedTrunc = false;
4032  uint64_t Mask = 0;
4033  switch (RetVT.SimpleTy) {
4034  default: return 0;
4035  case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4036  case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4037  case MVT::i32: Opc = AArch64::LSLVWr; break;
4038  case MVT::i64: Opc = AArch64::LSLVXr; break;
4039  }
4040 
4041  const TargetRegisterClass *RC =
4042  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4043  if (NeedTrunc) {
4044  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4045  Op1IsKill = true;
4046  }
4047  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4048  Op1IsKill);
4049  if (NeedTrunc)
4050  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4051  return ResultReg;
4052 }
4053 
4054 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4055  bool Op0IsKill, uint64_t Shift,
4056  bool IsZExt) {
4057  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4058  "Unexpected source/return type pair.");
4059  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4060  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4061  "Unexpected source value type.");
4062  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4063  RetVT == MVT::i64) && "Unexpected return value type.");
4064 
4065  bool Is64Bit = (RetVT == MVT::i64);
4066  unsigned RegSize = Is64Bit ? 64 : 32;
4067  unsigned DstBits = RetVT.getSizeInBits();
4068  unsigned SrcBits = SrcVT.getSizeInBits();
4069  const TargetRegisterClass *RC =
4070  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4071 
4072  // Just emit a copy for "zero" shifts.
4073  if (Shift == 0) {
4074  if (RetVT == SrcVT) {
4075  unsigned ResultReg = createResultReg(RC);
4076  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4077  TII.get(TargetOpcode::COPY), ResultReg)
4078  .addReg(Op0, getKillRegState(Op0IsKill));
4079  return ResultReg;
4080  } else
4081  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4082  }
4083 
4084  // Don't deal with undefined shifts.
4085  if (Shift >= DstBits)
4086  return 0;
4087 
4088  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4089  // {S|U}BFM Wd, Wn, #r, #s
4090  // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4091 
4092  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4093  // %2 = shl i16 %1, 4
4094  // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4095  // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4096  // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4097  // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4098 
4099  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4100  // %2 = shl i16 %1, 8
4101  // Wd<32+7-24,32-24> = Wn<7:0>
4102  // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4103  // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4104  // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4105 
4106  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4107  // %2 = shl i16 %1, 12
4108  // Wd<32+3-20,32-20> = Wn<3:0>
4109  // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4110  // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4111  // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4112 
4113  unsigned ImmR = RegSize - Shift;
4114  // Limit the width to the length of the source type.
4115  unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4116  static const unsigned OpcTable[2][2] = {
4117  {AArch64::SBFMWri, AArch64::SBFMXri},
4118  {AArch64::UBFMWri, AArch64::UBFMXri}
4119  };
4120  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4121  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4122  unsigned TmpReg = MRI.createVirtualRegister(RC);
4123  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4124  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4125  .addImm(0)
4126  .addReg(Op0, getKillRegState(Op0IsKill))
4127  .addImm(AArch64::sub_32);
4128  Op0 = TmpReg;
4129  Op0IsKill = true;
4130  }
4131  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4132 }
4133 
4134 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4135  unsigned Op1Reg, bool Op1IsKill) {
4136  unsigned Opc = 0;
4137  bool NeedTrunc = false;
4138  uint64_t Mask = 0;
4139  switch (RetVT.SimpleTy) {
4140  default: return 0;
4141  case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4142  case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4143  case MVT::i32: Opc = AArch64::LSRVWr; break;
4144  case MVT::i64: Opc = AArch64::LSRVXr; break;
4145  }
4146 
4147  const TargetRegisterClass *RC =
4148  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4149  if (NeedTrunc) {
4150  Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
4151  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4152  Op0IsKill = Op1IsKill = true;
4153  }
4154  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4155  Op1IsKill);
4156  if (NeedTrunc)
4157  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4158  return ResultReg;
4159 }
4160 
4161 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4162  bool Op0IsKill, uint64_t Shift,
4163  bool IsZExt) {
4164  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4165  "Unexpected source/return type pair.");
4166  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4167  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4168  "Unexpected source value type.");
4169  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4170  RetVT == MVT::i64) && "Unexpected return value type.");
4171 
4172  bool Is64Bit = (RetVT == MVT::i64);
4173  unsigned RegSize = Is64Bit ? 64 : 32;
4174  unsigned DstBits = RetVT.getSizeInBits();
4175  unsigned SrcBits = SrcVT.getSizeInBits();
4176  const TargetRegisterClass *RC =
4177  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4178 
4179  // Just emit a copy for "zero" shifts.
4180  if (Shift == 0) {
4181  if (RetVT == SrcVT) {
4182  unsigned ResultReg = createResultReg(RC);
4183  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4184  TII.get(TargetOpcode::COPY), ResultReg)
4185  .addReg(Op0, getKillRegState(Op0IsKill));
4186  return ResultReg;
4187  } else
4188  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4189  }
4190 
4191  // Don't deal with undefined shifts.
4192  if (Shift >= DstBits)
4193  return 0;
4194 
4195  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4196  // {S|U}BFM Wd, Wn, #r, #s
4197  // Wd<s-r:0> = Wn<s:r> when r <= s
4198 
4199  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4200  // %2 = lshr i16 %1, 4
4201  // Wd<7-4:0> = Wn<7:4>
4202  // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4203  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4204  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4205 
4206  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4207  // %2 = lshr i16 %1, 8
4208  // Wd<7-7,0> = Wn<7:7>
4209  // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4210  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4211  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4212 
4213  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4214  // %2 = lshr i16 %1, 12
4215  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4216  // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4217  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4218  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4219 
4220  if (Shift >= SrcBits && IsZExt)
4221  return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4222 
4223  // It is not possible to fold a sign-extend into the LShr instruction. In this
4224  // case emit a sign-extend.
4225  if (!IsZExt) {
4226  Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4227  if (!Op0)
4228  return 0;
4229  Op0IsKill = true;
4230  SrcVT = RetVT;
4231  SrcBits = SrcVT.getSizeInBits();
4232  IsZExt = true;
4233  }
4234 
4235  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4236  unsigned ImmS = SrcBits - 1;
4237  static const unsigned OpcTable[2][2] = {
4238  {AArch64::SBFMWri, AArch64::SBFMXri},
4239  {AArch64::UBFMWri, AArch64::UBFMXri}
4240  };
4241  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4242  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4243  unsigned TmpReg = MRI.createVirtualRegister(RC);
4244  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4245  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4246  .addImm(0)
4247  .addReg(Op0, getKillRegState(Op0IsKill))
4248  .addImm(AArch64::sub_32);
4249  Op0 = TmpReg;
4250  Op0IsKill = true;
4251  }
4252  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4253 }
4254 
4255 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4256  unsigned Op1Reg, bool Op1IsKill) {
4257  unsigned Opc = 0;
4258  bool NeedTrunc = false;
4259  uint64_t Mask = 0;
4260  switch (RetVT.SimpleTy) {
4261  default: return 0;
4262  case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4263  case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4264  case MVT::i32: Opc = AArch64::ASRVWr; break;
4265  case MVT::i64: Opc = AArch64::ASRVXr; break;
4266  }
4267 
4268  const TargetRegisterClass *RC =
4269  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4270  if (NeedTrunc) {
4271  Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
4272  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4273  Op0IsKill = Op1IsKill = true;
4274  }
4275  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4276  Op1IsKill);
4277  if (NeedTrunc)
4278  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4279  return ResultReg;
4280 }
4281 
4282 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4283  bool Op0IsKill, uint64_t Shift,
4284  bool IsZExt) {
4285  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4286  "Unexpected source/return type pair.");
4287  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4288  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4289  "Unexpected source value type.");
4290  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4291  RetVT == MVT::i64) && "Unexpected return value type.");
4292 
4293  bool Is64Bit = (RetVT == MVT::i64);
4294  unsigned RegSize = Is64Bit ? 64 : 32;
4295  unsigned DstBits = RetVT.getSizeInBits();
4296  unsigned SrcBits = SrcVT.getSizeInBits();
4297  const TargetRegisterClass *RC =
4298  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4299 
4300  // Just emit a copy for "zero" shifts.
4301  if (Shift == 0) {
4302  if (RetVT == SrcVT) {
4303  unsigned ResultReg = createResultReg(RC);
4304  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4305  TII.get(TargetOpcode::COPY), ResultReg)
4306  .addReg(Op0, getKillRegState(Op0IsKill));
4307  return ResultReg;
4308  } else
4309  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4310  }
4311 
4312  // Don't deal with undefined shifts.
4313  if (Shift >= DstBits)
4314  return 0;
4315 
4316  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4317  // {S|U}BFM Wd, Wn, #r, #s
4318  // Wd<s-r:0> = Wn<s:r> when r <= s
4319 
4320  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4321  // %2 = ashr i16 %1, 4
4322  // Wd<7-4:0> = Wn<7:4>
4323  // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4324  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4325  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4326 
4327  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4328  // %2 = ashr i16 %1, 8
4329  // Wd<7-7,0> = Wn<7:7>
4330  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4331  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4332  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4333 
4334  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4335  // %2 = ashr i16 %1, 12
4336  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4337  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4338  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4339  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4340 
4341  if (Shift >= SrcBits && IsZExt)
4342  return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4343 
4344  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4345  unsigned ImmS = SrcBits - 1;
4346  static const unsigned OpcTable[2][2] = {
4347  {AArch64::SBFMWri, AArch64::SBFMXri},
4348  {AArch64::UBFMWri, AArch64::UBFMXri}
4349  };
4350  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4351  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4352  unsigned TmpReg = MRI.createVirtualRegister(RC);
4353  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4354  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4355  .addImm(0)
4356  .addReg(Op0, getKillRegState(Op0IsKill))
4357  .addImm(AArch64::sub_32);
4358  Op0 = TmpReg;
4359  Op0IsKill = true;
4360  }
4361  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4362 }
4363 
4364 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4365  bool IsZExt) {
4366  assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4367 
4368  // FastISel does not have plumbing to deal with extensions where the SrcVT or
4369  // DestVT are odd things, so test to make sure that they are both types we can
4370  // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4371  // bail out to SelectionDAG.
4372  if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4373  (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4374  ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4375  (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4376  return 0;
4377 
4378  unsigned Opc;
4379  unsigned Imm = 0;
4380 
4381  switch (SrcVT.SimpleTy) {
4382  default:
4383  return 0;
4384  case MVT::i1:
4385  return emiti1Ext(SrcReg, DestVT, IsZExt);
4386  case MVT::i8:
4387  if (DestVT == MVT::i64)
4388  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4389  else
4390  Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4391  Imm = 7;
4392  break;
4393  case MVT::i16:
4394  if (DestVT == MVT::i64)
4395  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4396  else
4397  Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4398  Imm = 15;
4399  break;
4400  case MVT::i32:
4401  assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4402  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4403  Imm = 31;
4404  break;
4405  }
4406 
4407  // Handle i8 and i16 as i32.
4408  if (DestVT == MVT::i8 || DestVT == MVT::i16)
4409  DestVT = MVT::i32;
4410  else if (DestVT == MVT::i64) {
4411  unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4412  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4413  TII.get(AArch64::SUBREG_TO_REG), Src64)
4414  .addImm(0)
4415  .addReg(SrcReg)
4416  .addImm(AArch64::sub_32);
4417  SrcReg = Src64;
4418  }
4419 
4420  const TargetRegisterClass *RC =
4421  (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4422  return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
4423 }
4424 
4425 static bool isZExtLoad(const MachineInstr *LI) {
4426  switch (LI->getOpcode()) {
4427  default:
4428  return false;
4429  case AArch64::LDURBBi:
4430  case AArch64::LDURHHi:
4431  case AArch64::LDURWi:
4432  case AArch64::LDRBBui:
4433  case AArch64::LDRHHui:
4434  case AArch64::LDRWui:
4435  case AArch64::LDRBBroX:
4436  case AArch64::LDRHHroX:
4437  case AArch64::LDRWroX:
4438  case AArch64::LDRBBroW:
4439  case AArch64::LDRHHroW:
4440  case AArch64::LDRWroW:
4441  return true;
4442  }
4443 }
4444 
4445 static bool isSExtLoad(const MachineInstr *LI) {
4446  switch (LI->getOpcode()) {
4447  default:
4448  return false;
4449  case AArch64::LDURSBWi:
4450  case AArch64::LDURSHWi:
4451  case AArch64::LDURSBXi:
4452  case AArch64::LDURSHXi:
4453  case AArch64::LDURSWi:
4454  case AArch64::LDRSBWui:
4455  case AArch64::LDRSHWui:
4456  case AArch64::LDRSBXui:
4457  case AArch64::LDRSHXui:
4458  case AArch64::LDRSWui:
4459  case AArch64::LDRSBWroX:
4460  case AArch64::LDRSHWroX:
4461  case AArch64::LDRSBXroX:
4462  case AArch64::LDRSHXroX:
4463  case AArch64::LDRSWroX:
4464  case AArch64::LDRSBWroW:
4465  case AArch64::LDRSHWroW:
4466  case AArch64::LDRSBXroW:
4467  case AArch64::LDRSHXroW:
4468  case AArch64::LDRSWroW:
4469  return true;
4470  }
4471 }
4472 
4473 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4474  MVT SrcVT) {
4475  const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4476  if (!LI || !LI->hasOneUse())
4477  return false;
4478 
4479  // Check if the load instruction has already been selected.
4480  unsigned Reg = lookUpRegForValue(LI);
4481  if (!Reg)
4482  return false;
4483 
4484  MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4485  if (!MI)
4486  return false;
4487 
4488  // Check if the correct load instruction has been emitted - SelectionDAG might
4489  // have emitted a zero-extending load, but we need a sign-extending load.
4490  bool IsZExt = isa<ZExtInst>(I);
4491  const auto *LoadMI = MI;
4492  if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4493  LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4494  unsigned LoadReg = MI->getOperand(1).getReg();
4495  LoadMI = MRI.getUniqueVRegDef(LoadReg);
4496  assert(LoadMI && "Expected valid instruction");
4497  }
4498  if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4499  return false;
4500 
4501  // Nothing to be done.
4502  if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4503  updateValueMap(I, Reg);
4504  return true;
4505  }
4506 
4507  if (IsZExt) {
4508  unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4509  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4510  TII.get(AArch64::SUBREG_TO_REG), Reg64)
4511  .addImm(0)
4512  .addReg(Reg, getKillRegState(true))
4513  .addImm(AArch64::sub_32);
4514  Reg = Reg64;
4515  } else {
4516  assert((MI->getOpcode() == TargetOpcode::COPY &&
4517  MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4518  "Expected copy instruction");
4519  Reg = MI->getOperand(1).getReg();
4521  removeDeadCode(I, std::next(I));
4522  }
4523  updateValueMap(I, Reg);
4524  return true;
4525 }
4526 
4527 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4528  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4529  "Unexpected integer extend instruction.");
4530  MVT RetVT;
4531  MVT SrcVT;
4532  if (!isTypeSupported(I->getType(), RetVT))
4533  return false;
4534 
4535  if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4536  return false;
4537 
4538  // Try to optimize already sign-/zero-extended values from load instructions.
4539  if (optimizeIntExtLoad(I, RetVT, SrcVT))
4540  return true;
4541 
4542  unsigned SrcReg = getRegForValue(I->getOperand(0));
4543  if (!SrcReg)
4544  return false;
4545  bool SrcIsKill = hasTrivialKill(I->getOperand(0));
4546 
4547  // Try to optimize already sign-/zero-extended values from function arguments.
4548  bool IsZExt = isa<ZExtInst>(I);
4549  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4550  if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4551  if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4552  unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4553  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4554  TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4555  .addImm(0)
4556  .addReg(SrcReg, getKillRegState(SrcIsKill))
4557  .addImm(AArch64::sub_32);
4558  SrcReg = ResultReg;
4559  }
4560  // Conservatively clear all kill flags from all uses, because we are
4561  // replacing a sign-/zero-extend instruction at IR level with a nop at MI
4562  // level. The result of the instruction at IR level might have been
4563  // trivially dead, which is now not longer true.
4564  unsigned UseReg = lookUpRegForValue(I);
4565  if (UseReg)
4566  MRI.clearKillFlags(UseReg);
4567 
4568  updateValueMap(I, SrcReg);
4569  return true;
4570  }
4571  }
4572 
4573  unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4574  if (!ResultReg)
4575  return false;
4576 
4577  updateValueMap(I, ResultReg);
4578  return true;
4579 }
4580 
4581 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4582  EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4583  if (!DestEVT.isSimple())
4584  return false;
4585 
4586  MVT DestVT = DestEVT.getSimpleVT();
4587  if (DestVT != MVT::i64 && DestVT != MVT::i32)
4588  return false;
4589 
4590  unsigned DivOpc;
4591  bool Is64bit = (DestVT == MVT::i64);
4592  switch (ISDOpcode) {
4593  default:
4594  return false;
4595  case ISD::SREM:
4596  DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4597  break;
4598  case ISD::UREM:
4599  DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4600  break;
4601  }
4602  unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4603  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4604  if (!Src0Reg)
4605  return false;
4606  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4607 
4608  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4609  if (!Src1Reg)
4610  return false;
4611  bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4612 
4613  const TargetRegisterClass *RC =
4614  (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4615  unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4616  Src1Reg, /*IsKill=*/false);
4617  assert(QuotReg && "Unexpected DIV instruction emission failure.");
4618  // The remainder is computed as numerator - (quotient * denominator) using the
4619  // MSUB instruction.
4620  unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4621  Src1Reg, Src1IsKill, Src0Reg,
4622  Src0IsKill);
4623  updateValueMap(I, ResultReg);
4624  return true;
4625 }
4626 
4627 bool AArch64FastISel::selectMul(const Instruction *I) {
4628  MVT VT;
4629  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4630  return false;
4631 
4632  if (VT.isVector())
4633  return selectBinaryOp(I, ISD::MUL);
4634 
4635  const Value *Src0 = I->getOperand(0);
4636  const Value *Src1 = I->getOperand(1);
4637  if (const auto *C = dyn_cast<ConstantInt>(Src0))
4638  if (C->getValue().isPowerOf2())
4639  std::swap(Src0, Src1);
4640 
4641  // Try to simplify to a shift instruction.
4642  if (const auto *C = dyn_cast<ConstantInt>(Src1))
4643  if (C->getValue().isPowerOf2()) {
4644  uint64_t ShiftVal = C->getValue().logBase2();
4645  MVT SrcVT = VT;
4646  bool IsZExt = true;
4647  if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4648  if (!isIntExtFree(ZExt)) {
4649  MVT VT;
4650  if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4651  SrcVT = VT;
4652  IsZExt = true;
4653  Src0 = ZExt->getOperand(0);
4654  }
4655  }
4656  } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4657  if (!isIntExtFree(SExt)) {
4658  MVT VT;
4659  if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4660  SrcVT = VT;
4661  IsZExt = false;
4662  Src0 = SExt->getOperand(0);
4663  }
4664  }
4665  }
4666 
4667  unsigned Src0Reg = getRegForValue(Src0);
4668  if (!Src0Reg)
4669  return false;
4670  bool Src0IsKill = hasTrivialKill(Src0);
4671 
4672  unsigned ResultReg =
4673  emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4674 
4675  if (ResultReg) {
4676  updateValueMap(I, ResultReg);
4677  return true;
4678  }
4679  }
4680 
4681  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4682  if (!Src0Reg)
4683  return false;
4684  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4685 
4686  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4687  if (!Src1Reg)
4688  return false;
4689  bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4690 
4691  unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4692 
4693  if (!ResultReg)
4694  return false;
4695 
4696  updateValueMap(I, ResultReg);
4697  return true;
4698 }
4699 
4700 bool AArch64FastISel::selectShift(const Instruction *I) {
4701  MVT RetVT;
4702  if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4703  return false;
4704 
4705  if (RetVT.isVector())
4706  return selectOperator(I, I->getOpcode());
4707 
4708  if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4709  unsigned ResultReg = 0;
4710  uint64_t ShiftVal = C->getZExtValue();
4711  MVT SrcVT = RetVT;
4712  bool IsZExt = I->getOpcode() != Instruction::AShr;
4713  const Value *Op0 = I->getOperand(0);
4714  if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4715  if (!isIntExtFree(ZExt)) {
4716  MVT TmpVT;
4717  if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4718  SrcVT = TmpVT;
4719  IsZExt = true;
4720  Op0 = ZExt->getOperand(0);
4721  }
4722  }
4723  } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4724  if (!isIntExtFree(SExt)) {
4725  MVT TmpVT;
4726  if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4727  SrcVT = TmpVT;
4728  IsZExt = false;
4729  Op0 = SExt->getOperand(0);
4730  }
4731  }
4732  }
4733 
4734  unsigned Op0Reg = getRegForValue(Op0);
4735  if (!Op0Reg)
4736  return false;
4737  bool Op0IsKill = hasTrivialKill(Op0);
4738 
4739  switch (I->getOpcode()) {
4740  default: llvm_unreachable("Unexpected instruction.");
4741  case Instruction::Shl:
4742  ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4743  break;
4744  case Instruction::AShr:
4745  ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4746  break;
4747  case Instruction::LShr:
4748  ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4749  break;
4750  }
4751  if (!ResultReg)
4752  return false;
4753 
4754  updateValueMap(I, ResultReg);
4755  return true;
4756  }
4757 
4758  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4759  if (!Op0Reg)
4760  return false;
4761  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4762 
4763  unsigned Op1Reg = getRegForValue(I->getOperand(1));
4764  if (!Op1Reg)
4765  return false;
4766  bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4767 
4768  unsigned ResultReg = 0;
4769  switch (I->getOpcode()) {
4770  default: llvm_unreachable("Unexpected instruction.");
4771  case Instruction::Shl:
4772  ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4773  break;
4774  case Instruction::AShr:
4775  ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4776  break;
4777  case Instruction::LShr:
4778  ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4779  break;
4780  }
4781 
4782  if (!ResultReg)
4783  return false;
4784 
4785  updateValueMap(I, ResultReg);
4786  return true;
4787 }
4788 
4789 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4790  MVT RetVT, SrcVT;
4791 
4792  if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4793  return false;
4794  if (!isTypeLegal(I->getType(), RetVT))
4795  return false;
4796 
4797  unsigned Opc;
4798  if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4799  Opc = AArch64::FMOVWSr;
4800  else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4801  Opc = AArch64::FMOVXDr;
4802  else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4803  Opc = AArch64::FMOVSWr;
4804  else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4805  Opc = AArch64::FMOVDXr;
4806  else
4807  return false;
4808 
4809  const TargetRegisterClass *RC = nullptr;
4810  switch (RetVT.SimpleTy) {
4811  default: llvm_unreachable("Unexpected value type.");
4812  case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4813  case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4814  case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4815  case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4816  }
4817  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4818  if (!Op0Reg)
4819  return false;
4820  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4821  unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4822 
4823  if (!ResultReg)
4824  return false;
4825 
4826  updateValueMap(I, ResultReg);
4827  return true;
4828 }
4829 
4830 bool AArch64FastISel::selectFRem(const Instruction *I) {
4831  MVT RetVT;
4832  if (!isTypeLegal(I->getType(), RetVT))
4833  return false;
4834 
4835  RTLIB::Libcall LC;
4836  switch (RetVT.SimpleTy) {
4837  default:
4838  return false;
4839  case MVT::f32:
4840  LC = RTLIB::REM_F32;
4841  break;
4842  case MVT::f64:
4843  LC = RTLIB::REM_F64;
4844  break;
4845  }
4846 
4847  ArgListTy Args;
4848  Args.reserve(I->getNumOperands());
4849 
4850  // Populate the argument list.
4851  for (auto &Arg : I->operands()) {
4852  ArgListEntry Entry;
4853  Entry.Val = Arg;
4854  Entry.Ty = Arg->getType();
4855  Args.push_back(Entry);
4856  }
4857 
4858  CallLoweringInfo CLI;
4859  MCContext &Ctx = MF->getContext();
4860  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4861  TLI.getLibcallName(LC), std::move(Args));
4862  if (!lowerCallTo(CLI))
4863  return false;
4864  updateValueMap(I, CLI.ResultReg);
4865  return true;
4866 }
4867 
4868 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4869  MVT VT;
4870  if (!isTypeLegal(I->getType(), VT))
4871  return false;
4872 
4873  if (!isa<ConstantInt>(I->getOperand(1)))
4874  return selectBinaryOp(I, ISD::SDIV);
4875 
4876  const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4877  if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4878  !(C.isPowerOf2() || (-C).isPowerOf2()))
4879  return selectBinaryOp(I, ISD::SDIV);
4880 
4881  unsigned Lg2 = C.countTrailingZeros();
4882  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4883  if (!Src0Reg)
4884  return false;
4885  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4886 
4887  if (cast<BinaryOperator>(I)->isExact()) {
4888  unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4889  if (!ResultReg)
4890  return false;
4891  updateValueMap(I, ResultReg);
4892  return true;
4893  }
4894 
4895  int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4896  unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
4897  if (!AddReg)
4898  return false;
4899 
4900  // (Src0 < 0) ? Pow2 - 1 : 0;
4901  if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4902  return false;
4903 
4904  unsigned SelectOpc;
4905  const TargetRegisterClass *RC;
4906  if (VT == MVT::i64) {
4907  SelectOpc = AArch64::CSELXr;
4908  RC = &AArch64::GPR64RegClass;
4909  } else {
4910  SelectOpc = AArch64::CSELWr;
4911  RC = &AArch64::GPR32RegClass;
4912  }
4913  unsigned SelectReg =
4914  fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4915  Src0IsKill, AArch64CC::LT);
4916  if (!SelectReg)
4917  return false;
4918 
4919  // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4920  // negate the result.
4921  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4922  unsigned ResultReg;
4923  if (C.isNegative())
4924  ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4925  SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4926  else
4927  ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4928 
4929  if (!ResultReg)
4930  return false;
4931 
4932  updateValueMap(I, ResultReg);
4933  return true;
4934 }
4935 
4936 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4937 /// have to duplicate it for AArch64, because otherwise we would fail during the
4938 /// sign-extend emission.
4939 std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4940  unsigned IdxN = getRegForValue(Idx);
4941  if (IdxN == 0)
4942  // Unhandled operand. Halt "fast" selection and bail.
4943  return std::pair<unsigned, bool>(0, false);
4944 
4945  bool IdxNIsKill = hasTrivialKill(Idx);
4946 
4947  // If the index is smaller or larger than intptr_t, truncate or extend it.
4948  MVT PtrVT = TLI.getPointerTy(DL);
4949  EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4950  if (IdxVT.bitsLT(PtrVT)) {
4951  IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
4952  IdxNIsKill = true;
4953  } else if (IdxVT.bitsGT(PtrVT))
4954  llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4955  return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
4956 }
4957 
4958 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4959 /// duplicate it for AArch64, because otherwise we would bail out even for
4960 /// simple cases. This is because the standard fastEmit functions don't cover
4961 /// MUL at all and ADD is lowered very inefficientily.
4962 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4963  unsigned N = getRegForValue(I->getOperand(0));
4964  if (!N)
4965  return false;
4966  bool NIsKill = hasTrivialKill(I->getOperand(0));
4967 
4968  // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4969  // into a single N = N + TotalOffset.
4970  uint64_t TotalOffs = 0;
4971  MVT VT = TLI.getPointerTy(DL);
4972  for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
4973  GTI != E; ++GTI) {
4974  const Value *Idx = GTI.getOperand();
4975  if (auto *StTy = GTI.getStructTypeOrNull()) {
4976  unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4977  // N = N + Offset
4978  if (Field)
4979  TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4980  } else {
4981  Type *Ty = GTI.getIndexedType();
4982 
4983  // If this is a constant subscript, handle it quickly.
4984  if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4985  if (CI->isZero())
4986  continue;
4987  // N = N + Offset
4988  TotalOffs +=
4989  DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4990  continue;
4991  }
4992  if (TotalOffs) {
4993  N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4994  if (!N)
4995  return false;
4996  NIsKill = true;
4997  TotalOffs = 0;
4998  }
4999 
5000  // N = N + Idx * ElementSize;
5001  uint64_t ElementSize = DL.getTypeAllocSize(Ty);
5002  std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
5003  unsigned IdxN = Pair.first;
5004  bool IdxNIsKill = Pair.second;
5005  if (!IdxN)
5006  return false;
5007 
5008  if (ElementSize != 1) {
5009  unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5010  if (!C)
5011  return false;
5012  IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
5013  if (!IdxN)
5014  return false;
5015  IdxNIsKill = true;
5016  }
5017  N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
5018  if (!N)
5019  return false;
5020  }
5021  }
5022  if (TotalOffs) {
5023  N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
5024  if (!N)
5025  return false;
5026  }
5027  updateValueMap(I, N);
5028  return true;
5029 }
5030 
5031 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5032  assert(TM.getOptLevel() == CodeGenOpt::None &&
5033  "cmpxchg survived AtomicExpand at optlevel > -O0");
5034 
5035  auto *RetPairTy = cast<StructType>(I->getType());
5036  Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5037  assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5038  "cmpxchg has a non-i1 status result");
5039 
5040  MVT VT;
5041  if (!isTypeLegal(RetTy, VT))
5042  return false;
5043 
5044  const TargetRegisterClass *ResRC;
5045  unsigned Opc, CmpOpc;
5046  // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5047  // extractvalue selection doesn't support that.
5048  if (VT == MVT::i32) {
5049  Opc = AArch64::CMP_SWAP_32;
5050  CmpOpc = AArch64::SUBSWrs;
5051  ResRC = &AArch64::GPR32RegClass;
5052  } else if (VT == MVT::i64) {
5053  Opc = AArch64::CMP_SWAP_64;
5054  CmpOpc = AArch64::SUBSXrs;
5055  ResRC = &AArch64::GPR64RegClass;
5056  } else {
5057  return false;
5058  }
5059 
5060  const MCInstrDesc &II = TII.get(Opc);
5061 
5062  const unsigned AddrReg = constrainOperandRegClass(
5063  II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5064  const unsigned DesiredReg = constrainOperandRegClass(
5065  II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5066  const unsigned NewReg = constrainOperandRegClass(
5067  II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5068 
5069  const unsigned ResultReg1 = createResultReg(ResRC);
5070  const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5071  const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5072 
5073  // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5074  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
5075  .addDef(ResultReg1)
5076  .addDef(ScratchReg)
5077  .addUse(AddrReg)
5078  .addUse(DesiredReg)
5079  .addUse(NewReg);
5080 
5081  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
5082  .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5083  .addUse(ResultReg1)
5084  .addUse(DesiredReg)
5085  .addImm(0);
5086 
5087  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
5088  .addDef(ResultReg2)
5089  .addUse(AArch64::WZR)
5090  .addUse(AArch64::WZR)
5092 
5093  assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5094  updateValueMap(I, ResultReg1, 2);
5095  return true;
5096 }
5097 
5098 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5099  switch (I->getOpcode()) {
5100  default:
5101  break;
5102  case Instruction::Add:
5103  case Instruction::Sub:
5104  return selectAddSub(I);
5105  case Instruction::Mul:
5106  return selectMul(I);
5107  case Instruction::SDiv:
5108  return selectSDiv(I);
5109  case Instruction::SRem:
5110  if (!selectBinaryOp(I, ISD::SREM))
5111  return selectRem(I, ISD::SREM);
5112  return true;
5113  case Instruction::URem:
5114  if (!selectBinaryOp(I, ISD::UREM))
5115  return selectRem(I, ISD::UREM);
5116  return true;
5117  case Instruction::Shl:
5118  case Instruction::LShr:
5119  case Instruction::AShr:
5120  return selectShift(I);
5121  case Instruction::And:
5122  case Instruction::Or:
5123  case Instruction::Xor:
5124  return selectLogicalOp(I);
5125  case Instruction::Br:
5126  return selectBranch(I);
5127  case Instruction::IndirectBr:
5128  return selectIndirectBr(I);
5129  case Instruction::BitCast:
5130  if (!FastISel::selectBitCast(I))
5131  return selectBitCast(I);
5132  return true;
5133  case Instruction::FPToSI:
5134  if (!selectCast(I, ISD::FP_TO_SINT))
5135  return selectFPToInt(I, /*Signed=*/true);
5136  return true;
5137  case Instruction::FPToUI:
5138  return selectFPToInt(I, /*Signed=*/false);
5139  case Instruction::ZExt:
5140  case Instruction::SExt:
5141  return selectIntExt(I);
5142  case Instruction::Trunc:
5143  if (!selectCast(I, ISD::TRUNCATE))
5144  return selectTrunc(I);
5145  return true;
5146  case Instruction::FPExt:
5147  return selectFPExt(I);
5148  case Instruction::FPTrunc:
5149  return selectFPTrunc(I);
5150  case Instruction::SIToFP:
5151  if (!selectCast(I, ISD::SINT_TO_FP))
5152  return selectIntToFP(I, /*Signed=*/true);
5153  return true;
5154  case Instruction::UIToFP:
5155  return selectIntToFP(I, /*Signed=*/false);
5156  case Instruction::Load:
5157  return selectLoad(I);
5158  case Instruction::Store:
5159  return selectStore(I);
5160  case Instruction::FCmp:
5161  case Instruction::ICmp:
5162  return selectCmp(I);
5163  case Instruction::Select:
5164  return selectSelect(I);
5165  case Instruction::Ret:
5166  return selectRet(I);
5167  case Instruction::FRem:
5168  return selectFRem(I);
5169  case Instruction::GetElementPtr:
5170  return selectGetElementPtr(I);
5171  case Instruction::AtomicCmpXchg:
5172  return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5173  }
5174 
5175  // Silence warnings.
5176  (void)&CC_AArch64_DarwinPCS_VarArg;
5177  (void)&CC_AArch64_Win64_VarArg;
5178 
5179  // fall-back to target-independent instruction selection.
5180  return selectOperator(I, I->getOpcode());
5181 }
5182 
5183 namespace llvm {
5184 
5186  const TargetLibraryInfo *LibInfo) {
5187  return new AArch64FastISel(FuncInfo, LibInfo);
5188 }
5189 
5190 } // end namespace llvm
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:177
void setFrameAddressIsTaken(bool T)
uint64_t CallInst * C
Return a value (possibly void), from a function.
static MVT getIntegerVT(unsigned BitWidth)
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:636
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:35
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1563
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
LLVMContext & Context
Atomic ordering constants.
This class represents lattice values for constants.
Definition: AllocatorList.h:24
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:42
bool hasCustomCallingConv() const
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:265
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
an instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
Definition: Instructions.h:529
bool isVector() const
Return true if this is a vector value type.
static CondCode getInvertedCondCode(CondCode Code)
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:164
unsigned getReg() const
getReg - Returns the register number.
unsigned Reg
unsigned char ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
unsigned getSubReg() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
gep_type_iterator gep_type_end(const User *GEP)
const Value * getTrueValue() const
unsigned less or equal
Definition: InstrTypes.h:672
unsigned less than
Definition: InstrTypes.h:671
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:652
unsigned getValNo() const
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:705
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:662
This class wraps the llvm.memset intrinsic.
BasicBlock * getSuccessor(unsigned i) const
unsigned const TargetRegisterInfo * TRI
F(f)
block Block Frequency true
An instruction for reading from memory.
Definition: Instructions.h:168
Value * getCondition() const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
void reserve(size_type N)
Definition: SmallVector.h:376
Value * getLength() const
bool isAnyArgRegReserved(const MachineFunction &MF) const
unsigned getFrameRegister(const MachineFunction &MF) const override
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1135
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:657
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1632
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:529
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:656
A description of a memory reference used in the backend.
This class represents the LLVM &#39;select&#39; instruction.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:745
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
const HexagonInstrInfo * TII
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:97
Class to represent struct types.
Definition: DerivedTypes.h:201
const MachineInstrBuilder & addUse(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
bool isUnsigned() const
Definition: InstrTypes.h:822
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:653
This file contains the simple types necessary to represent the attributes associated with functions a...
SimpleValueType SimpleTy
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
LocInfo getLocInfo() const
This file implements a class to represent arbitrary precision integral constant values and operations...
bool is64BitVector() const
Return true if this is a 64-bit vector type.
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
unsigned getSizeInBits() const
Context object for machine code objects.
Definition: MCContext.h:63
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:201
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:85
This is a fast-path instruction selection class that generates poor code and doesn&#39;t support illegal ...
Definition: FastISel.h:67
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:889
iterator_range< succ_op_iterator > successors()
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:406
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:478
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
const AArch64RegisterInfo * getRegisterInfo() const override
static bool isZExtLoad(const MachineInstr *LI)
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:309
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
iterator_range< User::op_iterator > arg_operands()
Definition: InstrTypes.h:1127
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
amdgpu Simplify well known AMD library false Value * Callee
Value * getOperand(unsigned i) const
Definition: User.h:170
unsigned getKillRegState(bool B)
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:524
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:118
bool is128BitVector() const
Return true if this is a 128-bit vector type.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:229
bool isFloatTy() const
Return true if this is &#39;float&#39;, a 32-bit IEEE fp type.
Definition: Type.h:147
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:610
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV)...
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:364
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:169
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:629
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
Conditional or Unconditional Branch instruction.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Indirect Branch Instruction.
static bool isIntExtFree(const Instruction *I)
Check if the sign-/zero-extend will be a noop.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:434
This file declares a class to represent arbitrary precision floating point values and provide a varie...
bool useSmallAddressing() const
static bool isSExtLoad(const MachineInstr *LI)
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:646
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred)
op_range operands()
Definition: User.h:238
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:655
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const Value * getCondition() const
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:663
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:193
static unsigned getImplicitScaleFactor(MVT VT)
Determine the implicit scale factor that is applied by a memory operation for a given value type...
Extended Value Type.
Definition: ValueTypes.h:34
size_t size() const
Definition: SmallVector.h:53
bool isVolatile() const
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:661
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
signed greater than
Definition: InstrTypes.h:673
The memory access writes data.
const APFloat & getValueAPF() const
Definition: Constants.h:303
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
bool isReleaseOrStronger(AtomicOrdering ao)
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags...
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:650
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static AArch64_AM::ShiftExtendType getExtendType(unsigned Imm)
getExtendType - Extract the extend type for operands of arithmetic ops.
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:192
CCState - This class holds information needed while lowering arguments and return values...
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:660
Provides information about what library functions are available for the current target.
CCValAssign - Represent assignment of one arg/retval to a location.
const DataFlowGraph & G
Definition: RDFGraph.cpp:211
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
signed less than
Definition: InstrTypes.h:675
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned char TargetFlags=0) const
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:622
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:226
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
signed less or equal
Definition: InstrTypes.h:676
bool selectBitCast(const User *I)
Definition: FastISel.cpp:1527
Target - Wrapper for Target specific information.
Class for arbitrary precision integers.
Definition: APInt.h:70
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
This file defines the FastISel class.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:241
bool isPowerOf2() const
Check if this APInt&#39;s value is a power of two greater than zero.
Definition: APInt.h:464
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
const Value * getFalseValue() const
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
Flags
Flags values. These may be or&#39;d together.
amdgpu Simplify well known AMD library false Value Value * Arg
The memory access reads data.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
use_iterator use_begin()
Definition: Value.h:339
Representation of each machine instruction.
Definition: MachineInstr.h:64
void emitReservedArgRegCallError(const MachineFunction &MF) const
unsigned getNumArgOperands() const
Definition: InstrTypes.h:1133
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:311
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:387
unsigned getDestAddressSpace() const
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:551
unsigned greater or equal
Definition: InstrTypes.h:670
bool isZero() const
Return true if the value is positive or negative zero.
Definition: Constants.h:306
static unsigned UseReg(const MachineOperand &MO)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1181
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page...
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:193
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:654
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
uint32_t Size
Definition: Profile.cpp:47
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool isUnconditional() const
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:658
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:309
bool isRegLoc() const
const unsigned Kind
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:649
LLVM Value Representation.
Definition: Value.h:73
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:659
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
bool isThreadLocal() const
If the value is "Thread Local", its value isn&#39;t shared by the threads.
Definition: GlobalValue.h:247
IRTranslator LLVM IR MI
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:413
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
unsigned greater than
Definition: InstrTypes.h:669
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow...
unsigned constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II, const MachineOperand &RegMO, unsigned OpIdx)
Try to constrain Reg so that it is usable by argument OpIdx of the provided MCInstrDesc II...
Definition: Utils.cpp:47
APInt bitcastToAPInt() const
Definition: APFloat.h:1094
const MachineInstrBuilder & addDef(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:157
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
unsigned getLocReg() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:474
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:126
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:651
bool isDoubleTy() const
Return true if this is &#39;double&#39;, a 64-bit IEEE fp type.
Definition: Type.h:150
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:274
iterator_range< arg_iterator > args()
Definition: Function.h:689
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:648
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:218
signed greater or equal
Definition: InstrTypes.h:674
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:221
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:67
an instruction to allocate memory on the stack
Definition: Instructions.h:60
static bool isMulPowOf2(const Value *I)
Check if the multiply is by a power-of-2 constant.
gep_type_iterator gep_type_begin(const User *GEP)
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:380