LLVM  8.0.1
X86FastISel.cpp
Go to the documentation of this file.
1 //===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the X86-specific support for the FastISel class. Much
11 // of the target-specific code is generated by tablegen in the file
12 // X86GenFastISel.inc, which is #included here.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "X86.h"
17 #include "X86CallingConv.h"
18 #include "X86InstrBuilder.h"
19 #include "X86InstrInfo.h"
20 #include "X86MachineFunctionInfo.h"
21 #include "X86RegisterInfo.h"
22 #include "X86Subtarget.h"
23 #include "X86TargetMachine.h"
25 #include "llvm/CodeGen/FastISel.h"
30 #include "llvm/IR/CallSite.h"
31 #include "llvm/IR/CallingConv.h"
32 #include "llvm/IR/DebugInfo.h"
33 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalAlias.h"
36 #include "llvm/IR/GlobalVariable.h"
37 #include "llvm/IR/Instructions.h"
38 #include "llvm/IR/IntrinsicInst.h"
39 #include "llvm/IR/Operator.h"
40 #include "llvm/MC/MCAsmInfo.h"
41 #include "llvm/MC/MCSymbol.h"
44 using namespace llvm;
45 
46 namespace {
47 
48 class X86FastISel final : public FastISel {
49  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
50  /// make the right decision when generating code for different targets.
51  const X86Subtarget *Subtarget;
52 
53  /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
54  /// floating point ops.
55  /// When SSE is available, use it for f32 operations.
56  /// When SSE2 is available, use it for f64 operations.
57  bool X86ScalarSSEf64;
58  bool X86ScalarSSEf32;
59 
60 public:
61  explicit X86FastISel(FunctionLoweringInfo &funcInfo,
62  const TargetLibraryInfo *libInfo)
63  : FastISel(funcInfo, libInfo) {
64  Subtarget = &funcInfo.MF->getSubtarget<X86Subtarget>();
65  X86ScalarSSEf64 = Subtarget->hasSSE2();
66  X86ScalarSSEf32 = Subtarget->hasSSE1();
67  }
68 
69  bool fastSelectInstruction(const Instruction *I) override;
70 
71  /// The specified machine instr operand is a vreg, and that
72  /// vreg is being provided by the specified load instruction. If possible,
73  /// try to fold the load as an operand to the instruction, returning true if
74  /// possible.
75  bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
76  const LoadInst *LI) override;
77 
78  bool fastLowerArguments() override;
79  bool fastLowerCall(CallLoweringInfo &CLI) override;
80  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
81 
82 #include "X86GenFastISel.inc"
83 
84 private:
85  bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT,
86  const DebugLoc &DL);
87 
88  bool X86FastEmitLoad(EVT VT, X86AddressMode &AM, MachineMemOperand *MMO,
89  unsigned &ResultReg, unsigned Alignment = 1);
90 
91  bool X86FastEmitStore(EVT VT, const Value *Val, X86AddressMode &AM,
92  MachineMemOperand *MMO = nullptr, bool Aligned = false);
93  bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
94  X86AddressMode &AM,
95  MachineMemOperand *MMO = nullptr, bool Aligned = false);
96 
97  bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
98  unsigned &ResultReg);
99 
100  bool X86SelectAddress(const Value *V, X86AddressMode &AM);
101  bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
102 
103  bool X86SelectLoad(const Instruction *I);
104 
105  bool X86SelectStore(const Instruction *I);
106 
107  bool X86SelectRet(const Instruction *I);
108 
109  bool X86SelectCmp(const Instruction *I);
110 
111  bool X86SelectZExt(const Instruction *I);
112 
113  bool X86SelectSExt(const Instruction *I);
114 
115  bool X86SelectBranch(const Instruction *I);
116 
117  bool X86SelectShift(const Instruction *I);
118 
119  bool X86SelectDivRem(const Instruction *I);
120 
121  bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I);
122 
123  bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I);
124 
125  bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I);
126 
127  bool X86SelectSelect(const Instruction *I);
128 
129  bool X86SelectTrunc(const Instruction *I);
130 
131  bool X86SelectFPExtOrFPTrunc(const Instruction *I, unsigned Opc,
132  const TargetRegisterClass *RC);
133 
134  bool X86SelectFPExt(const Instruction *I);
135  bool X86SelectFPTrunc(const Instruction *I);
136  bool X86SelectSIToFP(const Instruction *I);
137  bool X86SelectUIToFP(const Instruction *I);
138  bool X86SelectIntToFP(const Instruction *I, bool IsSigned);
139 
140  const X86InstrInfo *getInstrInfo() const {
141  return Subtarget->getInstrInfo();
142  }
143  const X86TargetMachine *getTargetMachine() const {
144  return static_cast<const X86TargetMachine *>(&TM);
145  }
146 
147  bool handleConstantAddresses(const Value *V, X86AddressMode &AM);
148 
149  unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT);
150  unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT);
151  unsigned X86MaterializeGV(const GlobalValue *GV, MVT VT);
152  unsigned fastMaterializeConstant(const Constant *C) override;
153 
154  unsigned fastMaterializeAlloca(const AllocaInst *C) override;
155 
156  unsigned fastMaterializeFloatZero(const ConstantFP *CF) override;
157 
158  /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
159  /// computed in an SSE register, not on the X87 floating point stack.
160  bool isScalarFPTypeInSSEReg(EVT VT) const {
161  return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
162  (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
163  }
164 
165  bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
166 
167  bool IsMemcpySmall(uint64_t Len);
168 
169  bool TryEmitSmallMemcpy(X86AddressMode DestAM,
170  X86AddressMode SrcAM, uint64_t Len);
171 
172  bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
173  const Value *Cond);
174 
176  X86AddressMode &AM);
177 
178  unsigned fastEmitInst_rrrr(unsigned MachineInstOpcode,
179  const TargetRegisterClass *RC, unsigned Op0,
180  bool Op0IsKill, unsigned Op1, bool Op1IsKill,
181  unsigned Op2, bool Op2IsKill, unsigned Op3,
182  bool Op3IsKill);
183 };
184 
185 } // end anonymous namespace.
186 
187 static std::pair<unsigned, bool>
189  unsigned CC;
190  bool NeedSwap = false;
191 
192  // SSE Condition code mapping:
193  // 0 - EQ
194  // 1 - LT
195  // 2 - LE
196  // 3 - UNORD
197  // 4 - NEQ
198  // 5 - NLT
199  // 6 - NLE
200  // 7 - ORD
201  switch (Predicate) {
202  default: llvm_unreachable("Unexpected predicate");
203  case CmpInst::FCMP_OEQ: CC = 0; break;
204  case CmpInst::FCMP_OGT: NeedSwap = true; LLVM_FALLTHROUGH;
205  case CmpInst::FCMP_OLT: CC = 1; break;
206  case CmpInst::FCMP_OGE: NeedSwap = true; LLVM_FALLTHROUGH;
207  case CmpInst::FCMP_OLE: CC = 2; break;
208  case CmpInst::FCMP_UNO: CC = 3; break;
209  case CmpInst::FCMP_UNE: CC = 4; break;
210  case CmpInst::FCMP_ULE: NeedSwap = true; LLVM_FALLTHROUGH;
211  case CmpInst::FCMP_UGE: CC = 5; break;
212  case CmpInst::FCMP_ULT: NeedSwap = true; LLVM_FALLTHROUGH;
213  case CmpInst::FCMP_UGT: CC = 6; break;
214  case CmpInst::FCMP_ORD: CC = 7; break;
215  case CmpInst::FCMP_UEQ: CC = 8; break;
216  case CmpInst::FCMP_ONE: CC = 12; break;
217  }
218 
219  return std::make_pair(CC, NeedSwap);
220 }
221 
222 /// Adds a complex addressing mode to the given machine instr builder.
223 /// Note, this will constrain the index register. If its not possible to
224 /// constrain the given index register, then a new one will be created. The
225 /// IndexReg field of the addressing mode will be updated to match in this case.
226 const MachineInstrBuilder &
228  X86AddressMode &AM) {
229  // First constrain the index register. It needs to be a GR64_NOSP.
231  MIB->getNumOperands() +
233  return ::addFullAddress(MIB, AM);
234 }
235 
236 /// Check if it is possible to fold the condition from the XALU intrinsic
237 /// into the user. The condition code will only be updated on success.
238 bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
239  const Value *Cond) {
240  if (!isa<ExtractValueInst>(Cond))
241  return false;
242 
243  const auto *EV = cast<ExtractValueInst>(Cond);
244  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
245  return false;
246 
247  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
248  MVT RetVT;
249  const Function *Callee = II->getCalledFunction();
250  Type *RetTy =
251  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
252  if (!isTypeLegal(RetTy, RetVT))
253  return false;
254 
255  if (RetVT != MVT::i32 && RetVT != MVT::i64)
256  return false;
257 
258  X86::CondCode TmpCC;
259  switch (II->getIntrinsicID()) {
260  default: return false;
264  case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break;
266  case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break;
267  }
268 
269  // Check if both instructions are in the same basic block.
270  if (II->getParent() != I->getParent())
271  return false;
272 
273  // Make sure nothing is in the way
276  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
277  // We only expect extractvalue instructions between the intrinsic and the
278  // instruction to be selected.
279  if (!isa<ExtractValueInst>(Itr))
280  return false;
281 
282  // Check that the extractvalue operand comes from the intrinsic.
283  const auto *EVI = cast<ExtractValueInst>(Itr);
284  if (EVI->getAggregateOperand() != II)
285  return false;
286  }
287 
288  CC = TmpCC;
289  return true;
290 }
291 
292 bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
293  EVT evt = TLI.getValueType(DL, Ty, /*HandleUnknown=*/true);
294  if (evt == MVT::Other || !evt.isSimple())
295  // Unhandled type. Halt "fast" selection and bail.
296  return false;
297 
298  VT = evt.getSimpleVT();
299  // For now, require SSE/SSE2 for performing floating-point operations,
300  // since x87 requires additional work.
301  if (VT == MVT::f64 && !X86ScalarSSEf64)
302  return false;
303  if (VT == MVT::f32 && !X86ScalarSSEf32)
304  return false;
305  // Similarly, no f80 support yet.
306  if (VT == MVT::f80)
307  return false;
308  // We only handle legal types. For example, on x86-32 the instruction
309  // selector contains all of the 64-bit instructions from x86-64,
310  // under the assumption that i64 won't be used if the target doesn't
311  // support it.
312  return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
313 }
314 
315 #include "X86GenCallingConv.inc"
316 
317 /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
318 /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
319 /// Return true and the result register by reference if it is possible.
320 bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
321  MachineMemOperand *MMO, unsigned &ResultReg,
322  unsigned Alignment) {
323  bool HasSSE41 = Subtarget->hasSSE41();
324  bool HasAVX = Subtarget->hasAVX();
325  bool HasAVX2 = Subtarget->hasAVX2();
326  bool HasAVX512 = Subtarget->hasAVX512();
327  bool HasVLX = Subtarget->hasVLX();
328  bool IsNonTemporal = MMO && MMO->isNonTemporal();
329 
330  // Get opcode and regclass of the output for the given load instruction.
331  unsigned Opc = 0;
332  const TargetRegisterClass *RC = nullptr;
333  switch (VT.getSimpleVT().SimpleTy) {
334  default: return false;
335  case MVT::i1:
336  case MVT::i8:
337  Opc = X86::MOV8rm;
338  RC = &X86::GR8RegClass;
339  break;
340  case MVT::i16:
341  Opc = X86::MOV16rm;
342  RC = &X86::GR16RegClass;
343  break;
344  case MVT::i32:
345  Opc = X86::MOV32rm;
346  RC = &X86::GR32RegClass;
347  break;
348  case MVT::i64:
349  // Must be in x86-64 mode.
350  Opc = X86::MOV64rm;
351  RC = &X86::GR64RegClass;
352  break;
353  case MVT::f32:
354  if (X86ScalarSSEf32) {
355  Opc = HasAVX512 ? X86::VMOVSSZrm : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm;
356  RC = HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass;
357  } else {
358  Opc = X86::LD_Fp32m;
359  RC = &X86::RFP32RegClass;
360  }
361  break;
362  case MVT::f64:
363  if (X86ScalarSSEf64) {
364  Opc = HasAVX512 ? X86::VMOVSDZrm : HasAVX ? X86::VMOVSDrm : X86::MOVSDrm;
365  RC = HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass;
366  } else {
367  Opc = X86::LD_Fp64m;
368  RC = &X86::RFP64RegClass;
369  }
370  break;
371  case MVT::f80:
372  // No f80 support yet.
373  return false;
374  case MVT::v4f32:
375  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
376  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
377  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
378  else if (Alignment >= 16)
379  Opc = HasVLX ? X86::VMOVAPSZ128rm :
380  HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
381  else
382  Opc = HasVLX ? X86::VMOVUPSZ128rm :
383  HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
384  RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
385  break;
386  case MVT::v2f64:
387  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
388  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
389  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
390  else if (Alignment >= 16)
391  Opc = HasVLX ? X86::VMOVAPDZ128rm :
392  HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
393  else
394  Opc = HasVLX ? X86::VMOVUPDZ128rm :
395  HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
396  RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
397  break;
398  case MVT::v4i32:
399  case MVT::v2i64:
400  case MVT::v8i16:
401  case MVT::v16i8:
402  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
403  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
404  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
405  else if (Alignment >= 16)
406  Opc = HasVLX ? X86::VMOVDQA64Z128rm :
407  HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
408  else
409  Opc = HasVLX ? X86::VMOVDQU64Z128rm :
410  HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
411  RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
412  break;
413  case MVT::v8f32:
414  assert(HasAVX);
415  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
416  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
417  else if (IsNonTemporal && Alignment >= 16)
418  return false; // Force split for X86::VMOVNTDQArm
419  else if (Alignment >= 32)
420  Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
421  else
422  Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm;
423  RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
424  break;
425  case MVT::v4f64:
426  assert(HasAVX);
427  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
428  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
429  else if (IsNonTemporal && Alignment >= 16)
430  return false; // Force split for X86::VMOVNTDQArm
431  else if (Alignment >= 32)
432  Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
433  else
434  Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm;
435  RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
436  break;
437  case MVT::v8i32:
438  case MVT::v4i64:
439  case MVT::v16i16:
440  case MVT::v32i8:
441  assert(HasAVX);
442  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
443  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
444  else if (IsNonTemporal && Alignment >= 16)
445  return false; // Force split for X86::VMOVNTDQArm
446  else if (Alignment >= 32)
447  Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
448  else
449  Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm;
450  RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
451  break;
452  case MVT::v16f32:
453  assert(HasAVX512);
454  if (IsNonTemporal && Alignment >= 64)
455  Opc = X86::VMOVNTDQAZrm;
456  else
457  Opc = (Alignment >= 64) ? X86::VMOVAPSZrm : X86::VMOVUPSZrm;
458  RC = &X86::VR512RegClass;
459  break;
460  case MVT::v8f64:
461  assert(HasAVX512);
462  if (IsNonTemporal && Alignment >= 64)
463  Opc = X86::VMOVNTDQAZrm;
464  else
465  Opc = (Alignment >= 64) ? X86::VMOVAPDZrm : X86::VMOVUPDZrm;
466  RC = &X86::VR512RegClass;
467  break;
468  case MVT::v8i64:
469  case MVT::v16i32:
470  case MVT::v32i16:
471  case MVT::v64i8:
472  assert(HasAVX512);
473  // Note: There are a lot more choices based on type with AVX-512, but
474  // there's really no advantage when the load isn't masked.
475  if (IsNonTemporal && Alignment >= 64)
476  Opc = X86::VMOVNTDQAZrm;
477  else
478  Opc = (Alignment >= 64) ? X86::VMOVDQA64Zrm : X86::VMOVDQU64Zrm;
479  RC = &X86::VR512RegClass;
480  break;
481  }
482 
483  ResultReg = createResultReg(RC);
484  MachineInstrBuilder MIB =
485  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
486  addFullAddress(MIB, AM);
487  if (MMO)
488  MIB->addMemOperand(*FuncInfo.MF, MMO);
489  return true;
490 }
491 
492 /// X86FastEmitStore - Emit a machine instruction to store a value Val of
493 /// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
494 /// and a displacement offset, or a GlobalAddress,
495 /// i.e. V. Return true if it is possible.
496 bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
497  X86AddressMode &AM,
498  MachineMemOperand *MMO, bool Aligned) {
499  bool HasSSE1 = Subtarget->hasSSE1();
500  bool HasSSE2 = Subtarget->hasSSE2();
501  bool HasSSE4A = Subtarget->hasSSE4A();
502  bool HasAVX = Subtarget->hasAVX();
503  bool HasAVX512 = Subtarget->hasAVX512();
504  bool HasVLX = Subtarget->hasVLX();
505  bool IsNonTemporal = MMO && MMO->isNonTemporal();
506 
507  // Get opcode and regclass of the output for the given store instruction.
508  unsigned Opc = 0;
509  switch (VT.getSimpleVT().SimpleTy) {
510  case MVT::f80: // No f80 support yet.
511  default: return false;
512  case MVT::i1: {
513  // Mask out all but lowest bit.
514  unsigned AndResult = createResultReg(&X86::GR8RegClass);
515  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
516  TII.get(X86::AND8ri), AndResult)
517  .addReg(ValReg, getKillRegState(ValIsKill)).addImm(1);
518  ValReg = AndResult;
519  LLVM_FALLTHROUGH; // handle i1 as i8.
520  }
521  case MVT::i8: Opc = X86::MOV8mr; break;
522  case MVT::i16: Opc = X86::MOV16mr; break;
523  case MVT::i32:
524  Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr;
525  break;
526  case MVT::i64:
527  // Must be in x86-64 mode.
528  Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr;
529  break;
530  case MVT::f32:
531  if (X86ScalarSSEf32) {
532  if (IsNonTemporal && HasSSE4A)
533  Opc = X86::MOVNTSS;
534  else
535  Opc = HasAVX512 ? X86::VMOVSSZmr :
536  HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
537  } else
538  Opc = X86::ST_Fp32m;
539  break;
540  case MVT::f64:
541  if (X86ScalarSSEf32) {
542  if (IsNonTemporal && HasSSE4A)
543  Opc = X86::MOVNTSD;
544  else
545  Opc = HasAVX512 ? X86::VMOVSDZmr :
546  HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
547  } else
548  Opc = X86::ST_Fp64m;
549  break;
550  case MVT::x86mmx:
551  Opc = (IsNonTemporal && HasSSE1) ? X86::MMX_MOVNTQmr : X86::MMX_MOVQ64mr;
552  break;
553  case MVT::v4f32:
554  if (Aligned) {
555  if (IsNonTemporal)
556  Opc = HasVLX ? X86::VMOVNTPSZ128mr :
557  HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
558  else
559  Opc = HasVLX ? X86::VMOVAPSZ128mr :
560  HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
561  } else
562  Opc = HasVLX ? X86::VMOVUPSZ128mr :
563  HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
564  break;
565  case MVT::v2f64:
566  if (Aligned) {
567  if (IsNonTemporal)
568  Opc = HasVLX ? X86::VMOVNTPDZ128mr :
569  HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
570  else
571  Opc = HasVLX ? X86::VMOVAPDZ128mr :
572  HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
573  } else
574  Opc = HasVLX ? X86::VMOVUPDZ128mr :
575  HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
576  break;
577  case MVT::v4i32:
578  case MVT::v2i64:
579  case MVT::v8i16:
580  case MVT::v16i8:
581  if (Aligned) {
582  if (IsNonTemporal)
583  Opc = HasVLX ? X86::VMOVNTDQZ128mr :
584  HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
585  else
586  Opc = HasVLX ? X86::VMOVDQA64Z128mr :
587  HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
588  } else
589  Opc = HasVLX ? X86::VMOVDQU64Z128mr :
590  HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
591  break;
592  case MVT::v8f32:
593  assert(HasAVX);
594  if (Aligned) {
595  if (IsNonTemporal)
596  Opc = HasVLX ? X86::VMOVNTPSZ256mr : X86::VMOVNTPSYmr;
597  else
598  Opc = HasVLX ? X86::VMOVAPSZ256mr : X86::VMOVAPSYmr;
599  } else
600  Opc = HasVLX ? X86::VMOVUPSZ256mr : X86::VMOVUPSYmr;
601  break;
602  case MVT::v4f64:
603  assert(HasAVX);
604  if (Aligned) {
605  if (IsNonTemporal)
606  Opc = HasVLX ? X86::VMOVNTPDZ256mr : X86::VMOVNTPDYmr;
607  else
608  Opc = HasVLX ? X86::VMOVAPDZ256mr : X86::VMOVAPDYmr;
609  } else
610  Opc = HasVLX ? X86::VMOVUPDZ256mr : X86::VMOVUPDYmr;
611  break;
612  case MVT::v8i32:
613  case MVT::v4i64:
614  case MVT::v16i16:
615  case MVT::v32i8:
616  assert(HasAVX);
617  if (Aligned) {
618  if (IsNonTemporal)
619  Opc = HasVLX ? X86::VMOVNTDQZ256mr : X86::VMOVNTDQYmr;
620  else
621  Opc = HasVLX ? X86::VMOVDQA64Z256mr : X86::VMOVDQAYmr;
622  } else
623  Opc = HasVLX ? X86::VMOVDQU64Z256mr : X86::VMOVDQUYmr;
624  break;
625  case MVT::v16f32:
626  assert(HasAVX512);
627  if (Aligned)
628  Opc = IsNonTemporal ? X86::VMOVNTPSZmr : X86::VMOVAPSZmr;
629  else
630  Opc = X86::VMOVUPSZmr;
631  break;
632  case MVT::v8f64:
633  assert(HasAVX512);
634  if (Aligned) {
635  Opc = IsNonTemporal ? X86::VMOVNTPDZmr : X86::VMOVAPDZmr;
636  } else
637  Opc = X86::VMOVUPDZmr;
638  break;
639  case MVT::v8i64:
640  case MVT::v16i32:
641  case MVT::v32i16:
642  case MVT::v64i8:
643  assert(HasAVX512);
644  // Note: There are a lot more choices based on type with AVX-512, but
645  // there's really no advantage when the store isn't masked.
646  if (Aligned)
647  Opc = IsNonTemporal ? X86::VMOVNTDQZmr : X86::VMOVDQA64Zmr;
648  else
649  Opc = X86::VMOVDQU64Zmr;
650  break;
651  }
652 
653  const MCInstrDesc &Desc = TII.get(Opc);
654  // Some of the instructions in the previous switch use FR128 instead
655  // of FR32 for ValReg. Make sure the register we feed the instruction
656  // matches its register class constraints.
657  // Note: This is fine to do a copy from FR32 to FR128, this is the
658  // same registers behind the scene and actually why it did not trigger
659  // any bugs before.
660  ValReg = constrainOperandRegClass(Desc, ValReg, Desc.getNumOperands() - 1);
661  MachineInstrBuilder MIB =
662  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, Desc);
663  addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill));
664  if (MMO)
665  MIB->addMemOperand(*FuncInfo.MF, MMO);
666 
667  return true;
668 }
669 
670 bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
671  X86AddressMode &AM,
672  MachineMemOperand *MMO, bool Aligned) {
673  // Handle 'null' like i32/i64 0.
674  if (isa<ConstantPointerNull>(Val))
675  Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext()));
676 
677  // If this is a store of a simple constant, fold the constant into the store.
678  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
679  unsigned Opc = 0;
680  bool Signed = true;
681  switch (VT.getSimpleVT().SimpleTy) {
682  default: break;
683  case MVT::i1:
684  Signed = false;
685  LLVM_FALLTHROUGH; // Handle as i8.
686  case MVT::i8: Opc = X86::MOV8mi; break;
687  case MVT::i16: Opc = X86::MOV16mi; break;
688  case MVT::i32: Opc = X86::MOV32mi; break;
689  case MVT::i64:
690  // Must be a 32-bit sign extended value.
691  if (isInt<32>(CI->getSExtValue()))
692  Opc = X86::MOV64mi32;
693  break;
694  }
695 
696  if (Opc) {
697  MachineInstrBuilder MIB =
698  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
699  addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue()
700  : CI->getZExtValue());
701  if (MMO)
702  MIB->addMemOperand(*FuncInfo.MF, MMO);
703  return true;
704  }
705  }
706 
707  unsigned ValReg = getRegForValue(Val);
708  if (ValReg == 0)
709  return false;
710 
711  bool ValKill = hasTrivialKill(Val);
712  return X86FastEmitStore(VT, ValReg, ValKill, AM, MMO, Aligned);
713 }
714 
715 /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
716 /// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
717 /// ISD::SIGN_EXTEND).
718 bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
719  unsigned Src, EVT SrcVT,
720  unsigned &ResultReg) {
721  unsigned RR = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
722  Src, /*TODO: Kill=*/false);
723  if (RR == 0)
724  return false;
725 
726  ResultReg = RR;
727  return true;
728 }
729 
730 bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
731  // Handle constant address.
732  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
733  // Can't handle alternate code models yet.
734  if (TM.getCodeModel() != CodeModel::Small)
735  return false;
736 
737  // Can't handle TLS yet.
738  if (GV->isThreadLocal())
739  return false;
740 
741  // Can't handle !absolute_symbol references yet.
742  if (GV->isAbsoluteSymbolRef())
743  return false;
744 
745  // RIP-relative addresses can't have additional register operands, so if
746  // we've already folded stuff into the addressing mode, just force the
747  // global value into its own register, which we can use as the basereg.
748  if (!Subtarget->isPICStyleRIPRel() ||
749  (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
750  // Okay, we've committed to selecting this global. Set up the address.
751  AM.GV = GV;
752 
753  // Allow the subtarget to classify the global.
754  unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
755 
756  // If this reference is relative to the pic base, set it now.
757  if (isGlobalRelativeToPICBase(GVFlags)) {
758  // FIXME: How do we know Base.Reg is free??
759  AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
760  }
761 
762  // Unless the ABI requires an extra load, return a direct reference to
763  // the global.
764  if (!isGlobalStubReference(GVFlags)) {
765  if (Subtarget->isPICStyleRIPRel()) {
766  // Use rip-relative addressing if we can. Above we verified that the
767  // base and index registers are unused.
768  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
769  AM.Base.Reg = X86::RIP;
770  }
771  AM.GVOpFlags = GVFlags;
772  return true;
773  }
774 
775  // Ok, we need to do a load from a stub. If we've already loaded from
776  // this stub, reuse the loaded pointer, otherwise emit the load now.
777  DenseMap<const Value *, unsigned>::iterator I = LocalValueMap.find(V);
778  unsigned LoadReg;
779  if (I != LocalValueMap.end() && I->second != 0) {
780  LoadReg = I->second;
781  } else {
782  // Issue load from stub.
783  unsigned Opc = 0;
784  const TargetRegisterClass *RC = nullptr;
785  X86AddressMode StubAM;
786  StubAM.Base.Reg = AM.Base.Reg;
787  StubAM.GV = GV;
788  StubAM.GVOpFlags = GVFlags;
789 
790  // Prepare for inserting code in the local-value area.
791  SavePoint SaveInsertPt = enterLocalValueArea();
792 
793  if (TLI.getPointerTy(DL) == MVT::i64) {
794  Opc = X86::MOV64rm;
795  RC = &X86::GR64RegClass;
796 
797  if (Subtarget->isPICStyleRIPRel())
798  StubAM.Base.Reg = X86::RIP;
799  } else {
800  Opc = X86::MOV32rm;
801  RC = &X86::GR32RegClass;
802  }
803 
804  LoadReg = createResultReg(RC);
805  MachineInstrBuilder LoadMI =
806  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), LoadReg);
807  addFullAddress(LoadMI, StubAM);
808 
809  // Ok, back to normal mode.
810  leaveLocalValueArea(SaveInsertPt);
811 
812  // Prevent loading GV stub multiple times in same MBB.
813  LocalValueMap[V] = LoadReg;
814  }
815 
816  // Now construct the final address. Note that the Disp, Scale,
817  // and Index values may already be set here.
818  AM.Base.Reg = LoadReg;
819  AM.GV = nullptr;
820  return true;
821  }
822  }
823 
824  // If all else fails, try to materialize the value in a register.
825  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
826  if (AM.Base.Reg == 0) {
827  AM.Base.Reg = getRegForValue(V);
828  return AM.Base.Reg != 0;
829  }
830  if (AM.IndexReg == 0) {
831  assert(AM.Scale == 1 && "Scale with no index!");
832  AM.IndexReg = getRegForValue(V);
833  return AM.IndexReg != 0;
834  }
835  }
836 
837  return false;
838 }
839 
840 /// X86SelectAddress - Attempt to fill in an address from the given value.
841 ///
844 redo_gep:
845  const User *U = nullptr;
846  unsigned Opcode = Instruction::UserOp1;
847  if (const Instruction *I = dyn_cast<Instruction>(V)) {
848  // Don't walk into other basic blocks; it's possible we haven't
849  // visited them yet, so the instructions may not yet be assigned
850  // virtual registers.
851  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
852  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
853  Opcode = I->getOpcode();
854  U = I;
855  }
856  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
857  Opcode = C->getOpcode();
858  U = C;
859  }
860 
861  if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
862  if (Ty->getAddressSpace() > 255)
863  // Fast instruction selection doesn't support the special
864  // address spaces.
865  return false;
866 
867  switch (Opcode) {
868  default: break;
869  case Instruction::BitCast:
870  // Look past bitcasts.
871  return X86SelectAddress(U->getOperand(0), AM);
872 
873  case Instruction::IntToPtr:
874  // Look past no-op inttoptrs.
875  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
876  TLI.getPointerTy(DL))
877  return X86SelectAddress(U->getOperand(0), AM);
878  break;
879 
880  case Instruction::PtrToInt:
881  // Look past no-op ptrtoints.
882  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
883  return X86SelectAddress(U->getOperand(0), AM);
884  break;
885 
886  case Instruction::Alloca: {
887  // Do static allocas.
888  const AllocaInst *A = cast<AllocaInst>(V);
890  FuncInfo.StaticAllocaMap.find(A);
891  if (SI != FuncInfo.StaticAllocaMap.end()) {
893  AM.Base.FrameIndex = SI->second;
894  return true;
895  }
896  break;
897  }
898 
899  case Instruction::Add: {
900  // Adds of constants are common and easy enough.
901  if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
902  uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
903  // They have to fit in the 32-bit signed displacement field though.
904  if (isInt<32>(Disp)) {
905  AM.Disp = (uint32_t)Disp;
906  return X86SelectAddress(U->getOperand(0), AM);
907  }
908  }
909  break;
910  }
911 
912  case Instruction::GetElementPtr: {
913  X86AddressMode SavedAM = AM;
914 
915  // Pattern-match simple GEPs.
916  uint64_t Disp = (int32_t)AM.Disp;
917  unsigned IndexReg = AM.IndexReg;
918  unsigned Scale = AM.Scale;
920  // Iterate through the indices, folding what we can. Constants can be
921  // folded, and one dynamic index can be handled, if the scale is supported.
922  for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
923  i != e; ++i, ++GTI) {
924  const Value *Op = *i;
925  if (StructType *STy = GTI.getStructTypeOrNull()) {
926  const StructLayout *SL = DL.getStructLayout(STy);
927  Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
928  continue;
929  }
930 
931  // A array/variable index is always of the form i*S where S is the
932  // constant scale size. See if we can push the scale into immediates.
933  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
934  for (;;) {
935  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
936  // Constant-offset addressing.
937  Disp += CI->getSExtValue() * S;
938  break;
939  }
940  if (canFoldAddIntoGEP(U, Op)) {
941  // A compatible add with a constant operand. Fold the constant.
942  ConstantInt *CI =
943  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
944  Disp += CI->getSExtValue() * S;
945  // Iterate on the other operand.
946  Op = cast<AddOperator>(Op)->getOperand(0);
947  continue;
948  }
949  if (IndexReg == 0 &&
950  (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
951  (S == 1 || S == 2 || S == 4 || S == 8)) {
952  // Scaled-index addressing.
953  Scale = S;
954  IndexReg = getRegForGEPIndex(Op).first;
955  if (IndexReg == 0)
956  return false;
957  break;
958  }
959  // Unsupported.
960  goto unsupported_gep;
961  }
962  }
963 
964  // Check for displacement overflow.
965  if (!isInt<32>(Disp))
966  break;
967 
968  AM.IndexReg = IndexReg;
969  AM.Scale = Scale;
970  AM.Disp = (uint32_t)Disp;
971  GEPs.push_back(V);
972 
973  if (const GetElementPtrInst *GEP =
974  dyn_cast<GetElementPtrInst>(U->getOperand(0))) {
975  // Ok, the GEP indices were covered by constant-offset and scaled-index
976  // addressing. Update the address state and move on to examining the base.
977  V = GEP;
978  goto redo_gep;
979  } else if (X86SelectAddress(U->getOperand(0), AM)) {
980  return true;
981  }
982 
983  // If we couldn't merge the gep value into this addr mode, revert back to
984  // our address and just match the value instead of completely failing.
985  AM = SavedAM;
986 
987  for (const Value *I : reverse(GEPs))
988  if (handleConstantAddresses(I, AM))
989  return true;
990 
991  return false;
992  unsupported_gep:
993  // Ok, the GEP indices weren't all covered.
994  break;
995  }
996  }
997 
998  return handleConstantAddresses(V, AM);
999 }
1000 
1001 /// X86SelectCallAddress - Attempt to fill in an address from the given value.
1002 ///
1003 bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
1004  const User *U = nullptr;
1005  unsigned Opcode = Instruction::UserOp1;
1006  const Instruction *I = dyn_cast<Instruction>(V);
1007  // Record if the value is defined in the same basic block.
1008  //
1009  // This information is crucial to know whether or not folding an
1010  // operand is valid.
1011  // Indeed, FastISel generates or reuses a virtual register for all
1012  // operands of all instructions it selects. Obviously, the definition and
1013  // its uses must use the same virtual register otherwise the produced
1014  // code is incorrect.
1015  // Before instruction selection, FunctionLoweringInfo::set sets the virtual
1016  // registers for values that are alive across basic blocks. This ensures
1017  // that the values are consistently set between across basic block, even
1018  // if different instruction selection mechanisms are used (e.g., a mix of
1019  // SDISel and FastISel).
1020  // For values local to a basic block, the instruction selection process
1021  // generates these virtual registers with whatever method is appropriate
1022  // for its needs. In particular, FastISel and SDISel do not share the way
1023  // local virtual registers are set.
1024  // Therefore, this is impossible (or at least unsafe) to share values
1025  // between basic blocks unless they use the same instruction selection
1026  // method, which is not guarantee for X86.
1027  // Moreover, things like hasOneUse could not be used accurately, if we
1028  // allow to reference values across basic blocks whereas they are not
1029  // alive across basic blocks initially.
1030  bool InMBB = true;
1031  if (I) {
1032  Opcode = I->getOpcode();
1033  U = I;
1034  InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
1035  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
1036  Opcode = C->getOpcode();
1037  U = C;
1038  }
1039 
1040  switch (Opcode) {
1041  default: break;
1042  case Instruction::BitCast:
1043  // Look past bitcasts if its operand is in the same BB.
1044  if (InMBB)
1045  return X86SelectCallAddress(U->getOperand(0), AM);
1046  break;
1047 
1048  case Instruction::IntToPtr:
1049  // Look past no-op inttoptrs if its operand is in the same BB.
1050  if (InMBB &&
1051  TLI.getValueType(DL, U->getOperand(0)->getType()) ==
1052  TLI.getPointerTy(DL))
1053  return X86SelectCallAddress(U->getOperand(0), AM);
1054  break;
1055 
1056  case Instruction::PtrToInt:
1057  // Look past no-op ptrtoints if its operand is in the same BB.
1058  if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
1059  return X86SelectCallAddress(U->getOperand(0), AM);
1060  break;
1061  }
1062 
1063  // Handle constant address.
1064  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
1065  // Can't handle alternate code models yet.
1066  if (TM.getCodeModel() != CodeModel::Small)
1067  return false;
1068 
1069  // RIP-relative addresses can't have additional register operands.
1070  if (Subtarget->isPICStyleRIPRel() &&
1071  (AM.Base.Reg != 0 || AM.IndexReg != 0))
1072  return false;
1073 
1074  // Can't handle TLS.
1075  if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
1076  if (GVar->isThreadLocal())
1077  return false;
1078 
1079  // Okay, we've committed to selecting this global. Set up the basic address.
1080  AM.GV = GV;
1081 
1082  // Return a direct reference to the global. Fastisel can handle calls to
1083  // functions that require loads, such as dllimport and nonlazybind
1084  // functions.
1085  if (Subtarget->isPICStyleRIPRel()) {
1086  // Use rip-relative addressing if we can. Above we verified that the
1087  // base and index registers are unused.
1088  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
1089  AM.Base.Reg = X86::RIP;
1090  } else {
1091  AM.GVOpFlags = Subtarget->classifyLocalReference(nullptr);
1092  }
1093 
1094  return true;
1095  }
1096 
1097  // If all else fails, try to materialize the value in a register.
1098  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
1099  if (AM.Base.Reg == 0) {
1100  AM.Base.Reg = getRegForValue(V);
1101  return AM.Base.Reg != 0;
1102  }
1103  if (AM.IndexReg == 0) {
1104  assert(AM.Scale == 1 && "Scale with no index!");
1105  AM.IndexReg = getRegForValue(V);
1106  return AM.IndexReg != 0;
1107  }
1108  }
1109 
1110  return false;
1111 }
1112 
1113 
1114 /// X86SelectStore - Select and emit code to implement store instructions.
1115 bool X86FastISel::X86SelectStore(const Instruction *I) {
1116  // Atomic stores need special handling.
1117  const StoreInst *S = cast<StoreInst>(I);
1118 
1119  if (S->isAtomic())
1120  return false;
1121 
1122  const Value *PtrV = I->getOperand(1);
1123  if (TLI.supportSwiftError()) {
1124  // Swifterror values can come from either a function parameter with
1125  // swifterror attribute or an alloca with swifterror attribute.
1126  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
1127  if (Arg->hasSwiftErrorAttr())
1128  return false;
1129  }
1130 
1131  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
1132  if (Alloca->isSwiftError())
1133  return false;
1134  }
1135  }
1136 
1137  const Value *Val = S->getValueOperand();
1138  const Value *Ptr = S->getPointerOperand();
1139 
1140  MVT VT;
1141  if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true))
1142  return false;
1143 
1144  unsigned Alignment = S->getAlignment();
1145  unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType());
1146  if (Alignment == 0) // Ensure that codegen never sees alignment 0
1147  Alignment = ABIAlignment;
1148  bool Aligned = Alignment >= ABIAlignment;
1149 
1150  X86AddressMode AM;
1151  if (!X86SelectAddress(Ptr, AM))
1152  return false;
1153 
1154  return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned);
1155 }
1156 
1157 /// X86SelectRet - Select and emit code to implement ret instructions.
1158 bool X86FastISel::X86SelectRet(const Instruction *I) {
1159  const ReturnInst *Ret = cast<ReturnInst>(I);
1160  const Function &F = *I->getParent()->getParent();
1161  const X86MachineFunctionInfo *X86MFInfo =
1162  FuncInfo.MF->getInfo<X86MachineFunctionInfo>();
1163 
1164  if (!FuncInfo.CanLowerReturn)
1165  return false;
1166 
1167  if (TLI.supportSwiftError() &&
1168  F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
1169  return false;
1170 
1171  if (TLI.supportSplitCSR(FuncInfo.MF))
1172  return false;
1173 
1174  CallingConv::ID CC = F.getCallingConv();
1175  if (CC != CallingConv::C &&
1176  CC != CallingConv::Fast &&
1177  CC != CallingConv::X86_FastCall &&
1178  CC != CallingConv::X86_StdCall &&
1179  CC != CallingConv::X86_ThisCall &&
1180  CC != CallingConv::X86_64_SysV &&
1181  CC != CallingConv::Win64)
1182  return false;
1183 
1184  // Don't handle popping bytes if they don't fit the ret's immediate.
1185  if (!isUInt<16>(X86MFInfo->getBytesToPopOnReturn()))
1186  return false;
1187 
1188  // fastcc with -tailcallopt is intended to provide a guaranteed
1189  // tail call optimization. Fastisel doesn't know how to do that.
1190  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
1191  return false;
1192 
1193  // Let SDISel handle vararg functions.
1194  if (F.isVarArg())
1195  return false;
1196 
1197  // Build a list of return value registers.
1198  SmallVector<unsigned, 4> RetRegs;
1199 
1200  if (Ret->getNumOperands() > 0) {
1202  GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
1203 
1204  // Analyze operands of the call, assigning locations to each operand.
1206  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
1207  CCInfo.AnalyzeReturn(Outs, RetCC_X86);
1208 
1209  const Value *RV = Ret->getOperand(0);
1210  unsigned Reg = getRegForValue(RV);
1211  if (Reg == 0)
1212  return false;
1213 
1214  // Only handle a single return value for now.
1215  if (ValLocs.size() != 1)
1216  return false;
1217 
1218  CCValAssign &VA = ValLocs[0];
1219 
1220  // Don't bother handling odd stuff for now.
1221  if (VA.getLocInfo() != CCValAssign::Full)
1222  return false;
1223  // Only handle register returns for now.
1224  if (!VA.isRegLoc())
1225  return false;
1226 
1227  // The calling-convention tables for x87 returns don't tell
1228  // the whole story.
1229  if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
1230  return false;
1231 
1232  unsigned SrcReg = Reg + VA.getValNo();
1233  EVT SrcVT = TLI.getValueType(DL, RV->getType());
1234  EVT DstVT = VA.getValVT();
1235  // Special handling for extended integers.
1236  if (SrcVT != DstVT) {
1237  if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
1238  return false;
1239 
1240  if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
1241  return false;
1242 
1243  assert(DstVT == MVT::i32 && "X86 should always ext to i32");
1244 
1245  if (SrcVT == MVT::i1) {
1246  if (Outs[0].Flags.isSExt())
1247  return false;
1248  SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
1249  SrcVT = MVT::i8;
1250  }
1251  unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
1253  SrcReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
1254  SrcReg, /*TODO: Kill=*/false);
1255  }
1256 
1257  // Make the copy.
1258  unsigned DstReg = VA.getLocReg();
1259  const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
1260  // Avoid a cross-class copy. This is very unlikely.
1261  if (!SrcRC->contains(DstReg))
1262  return false;
1263  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1264  TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
1265 
1266  // Add register to return instruction.
1267  RetRegs.push_back(VA.getLocReg());
1268  }
1269 
1270  // Swift calling convention does not require we copy the sret argument
1271  // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
1272 
1273  // All x86 ABIs require that for returning structs by value we copy
1274  // the sret argument into %rax/%eax (depending on ABI) for the return.
1275  // We saved the argument into a virtual register in the entry block,
1276  // so now we copy the value out and into %rax/%eax.
1277  if (F.hasStructRetAttr() && CC != CallingConv::Swift) {
1278  unsigned Reg = X86MFInfo->getSRetReturnReg();
1279  assert(Reg &&
1280  "SRetReturnReg should have been set in LowerFormalArguments()!");
1281  unsigned RetReg = Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX;
1282  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1283  TII.get(TargetOpcode::COPY), RetReg).addReg(Reg);
1284  RetRegs.push_back(RetReg);
1285  }
1286 
1287  // Now emit the RET.
1288  MachineInstrBuilder MIB;
1289  if (X86MFInfo->getBytesToPopOnReturn()) {
1290  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1291  TII.get(Subtarget->is64Bit() ? X86::RETIQ : X86::RETIL))
1292  .addImm(X86MFInfo->getBytesToPopOnReturn());
1293  } else {
1294  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1295  TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL));
1296  }
1297  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
1298  MIB.addReg(RetRegs[i], RegState::Implicit);
1299  return true;
1300 }
1301 
1302 /// X86SelectLoad - Select and emit code to implement load instructions.
1303 ///
1304 bool X86FastISel::X86SelectLoad(const Instruction *I) {
1305  const LoadInst *LI = cast<LoadInst>(I);
1306 
1307  // Atomic loads need special handling.
1308  if (LI->isAtomic())
1309  return false;
1310 
1311  const Value *SV = I->getOperand(0);
1312  if (TLI.supportSwiftError()) {
1313  // Swifterror values can come from either a function parameter with
1314  // swifterror attribute or an alloca with swifterror attribute.
1315  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1316  if (Arg->hasSwiftErrorAttr())
1317  return false;
1318  }
1319 
1320  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1321  if (Alloca->isSwiftError())
1322  return false;
1323  }
1324  }
1325 
1326  MVT VT;
1327  if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true))
1328  return false;
1329 
1330  const Value *Ptr = LI->getPointerOperand();
1331 
1332  X86AddressMode AM;
1333  if (!X86SelectAddress(Ptr, AM))
1334  return false;
1335 
1336  unsigned Alignment = LI->getAlignment();
1337  unsigned ABIAlignment = DL.getABITypeAlignment(LI->getType());
1338  if (Alignment == 0) // Ensure that codegen never sees alignment 0
1339  Alignment = ABIAlignment;
1340 
1341  unsigned ResultReg = 0;
1342  if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg,
1343  Alignment))
1344  return false;
1345 
1346  updateValueMap(I, ResultReg);
1347  return true;
1348 }
1349 
1350 static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
1351  bool HasAVX512 = Subtarget->hasAVX512();
1352  bool HasAVX = Subtarget->hasAVX();
1353  bool X86ScalarSSEf32 = Subtarget->hasSSE1();
1354  bool X86ScalarSSEf64 = Subtarget->hasSSE2();
1355 
1356  switch (VT.getSimpleVT().SimpleTy) {
1357  default: return 0;
1358  case MVT::i8: return X86::CMP8rr;
1359  case MVT::i16: return X86::CMP16rr;
1360  case MVT::i32: return X86::CMP32rr;
1361  case MVT::i64: return X86::CMP64rr;
1362  case MVT::f32:
1363  return X86ScalarSSEf32
1364  ? (HasAVX512 ? X86::VUCOMISSZrr
1365  : HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr)
1366  : 0;
1367  case MVT::f64:
1368  return X86ScalarSSEf64
1369  ? (HasAVX512 ? X86::VUCOMISDZrr
1370  : HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr)
1371  : 0;
1372  }
1373 }
1374 
1375 /// If we have a comparison with RHS as the RHS of the comparison, return an
1376 /// opcode that works for the compare (e.g. CMP32ri) otherwise return 0.
1377 static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
1378  int64_t Val = RHSC->getSExtValue();
1379  switch (VT.getSimpleVT().SimpleTy) {
1380  // Otherwise, we can't fold the immediate into this comparison.
1381  default:
1382  return 0;
1383  case MVT::i8:
1384  return X86::CMP8ri;
1385  case MVT::i16:
1386  if (isInt<8>(Val))
1387  return X86::CMP16ri8;
1388  return X86::CMP16ri;
1389  case MVT::i32:
1390  if (isInt<8>(Val))
1391  return X86::CMP32ri8;
1392  return X86::CMP32ri;
1393  case MVT::i64:
1394  if (isInt<8>(Val))
1395  return X86::CMP64ri8;
1396  // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
1397  // field.
1398  if (isInt<32>(Val))
1399  return X86::CMP64ri32;
1400  return 0;
1401  }
1402 }
1403 
1404 bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, EVT VT,
1405  const DebugLoc &CurDbgLoc) {
1406  unsigned Op0Reg = getRegForValue(Op0);
1407  if (Op0Reg == 0) return false;
1408 
1409  // Handle 'null' like i32/i64 0.
1410  if (isa<ConstantPointerNull>(Op1))
1411  Op1 = Constant::getNullValue(DL.getIntPtrType(Op0->getContext()));
1412 
1413  // We have two options: compare with register or immediate. If the RHS of
1414  // the compare is an immediate that we can fold into this compare, use
1415  // CMPri, otherwise use CMPrr.
1416  if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1417  if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
1418  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareImmOpc))
1419  .addReg(Op0Reg)
1420  .addImm(Op1C->getSExtValue());
1421  return true;
1422  }
1423  }
1424 
1425  unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
1426  if (CompareOpc == 0) return false;
1427 
1428  unsigned Op1Reg = getRegForValue(Op1);
1429  if (Op1Reg == 0) return false;
1430  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareOpc))
1431  .addReg(Op0Reg)
1432  .addReg(Op1Reg);
1433 
1434  return true;
1435 }
1436 
1437 bool X86FastISel::X86SelectCmp(const Instruction *I) {
1438  const CmpInst *CI = cast<CmpInst>(I);
1439 
1440  MVT VT;
1441  if (!isTypeLegal(I->getOperand(0)->getType(), VT))
1442  return false;
1443 
1444  // Try to optimize or fold the cmp.
1445  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1446  unsigned ResultReg = 0;
1447  switch (Predicate) {
1448  default: break;
1449  case CmpInst::FCMP_FALSE: {
1450  ResultReg = createResultReg(&X86::GR32RegClass);
1451  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0),
1452  ResultReg);
1453  ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
1454  X86::sub_8bit);
1455  if (!ResultReg)
1456  return false;
1457  break;
1458  }
1459  case CmpInst::FCMP_TRUE: {
1460  ResultReg = createResultReg(&X86::GR8RegClass);
1461  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
1462  ResultReg).addImm(1);
1463  break;
1464  }
1465  }
1466 
1467  if (ResultReg) {
1468  updateValueMap(I, ResultReg);
1469  return true;
1470  }
1471 
1472  const Value *LHS = CI->getOperand(0);
1473  const Value *RHS = CI->getOperand(1);
1474 
1475  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
1476  // We don't have to materialize a zero constant for this case and can just use
1477  // %x again on the RHS.
1478  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
1479  const auto *RHSC = dyn_cast<ConstantFP>(RHS);
1480  if (RHSC && RHSC->isNullValue())
1481  RHS = LHS;
1482  }
1483 
1484  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
1485  static const uint16_t SETFOpcTable[2][3] = {
1486  { X86::SETEr, X86::SETNPr, X86::AND8rr },
1487  { X86::SETNEr, X86::SETPr, X86::OR8rr }
1488  };
1489  const uint16_t *SETFOpc = nullptr;
1490  switch (Predicate) {
1491  default: break;
1492  case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break;
1493  case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break;
1494  }
1495 
1496  ResultReg = createResultReg(&X86::GR8RegClass);
1497  if (SETFOpc) {
1498  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1499  return false;
1500 
1501  unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
1502  unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
1503  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
1504  FlagReg1);
1505  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
1506  FlagReg2);
1507  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]),
1508  ResultReg).addReg(FlagReg1).addReg(FlagReg2);
1509  updateValueMap(I, ResultReg);
1510  return true;
1511  }
1512 
1513  X86::CondCode CC;
1514  bool SwapArgs;
1515  std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1516  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1517  unsigned Opc = X86::getSETFromCond(CC);
1518 
1519  if (SwapArgs)
1520  std::swap(LHS, RHS);
1521 
1522  // Emit a compare of LHS/RHS.
1523  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1524  return false;
1525 
1526  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
1527  updateValueMap(I, ResultReg);
1528  return true;
1529 }
1530 
1531 bool X86FastISel::X86SelectZExt(const Instruction *I) {
1532  EVT DstVT = TLI.getValueType(DL, I->getType());
1533  if (!TLI.isTypeLegal(DstVT))
1534  return false;
1535 
1536  unsigned ResultReg = getRegForValue(I->getOperand(0));
1537  if (ResultReg == 0)
1538  return false;
1539 
1540  // Handle zero-extension from i1 to i8, which is common.
1541  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1542  if (SrcVT == MVT::i1) {
1543  // Set the high bits to zero.
1544  ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
1545  SrcVT = MVT::i8;
1546 
1547  if (ResultReg == 0)
1548  return false;
1549  }
1550 
1551  if (DstVT == MVT::i64) {
1552  // Handle extension to 64-bits via sub-register shenanigans.
1553  unsigned MovInst;
1554 
1555  switch (SrcVT.SimpleTy) {
1556  case MVT::i8: MovInst = X86::MOVZX32rr8; break;
1557  case MVT::i16: MovInst = X86::MOVZX32rr16; break;
1558  case MVT::i32: MovInst = X86::MOV32rr; break;
1559  default: llvm_unreachable("Unexpected zext to i64 source type");
1560  }
1561 
1562  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1563  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovInst), Result32)
1564  .addReg(ResultReg);
1565 
1566  ResultReg = createResultReg(&X86::GR64RegClass);
1567  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG),
1568  ResultReg)
1569  .addImm(0).addReg(Result32).addImm(X86::sub_32bit);
1570  } else if (DstVT == MVT::i16) {
1571  // i8->i16 doesn't exist in the autogenerated isel table. Need to zero
1572  // extend to 32-bits and then extract down to 16-bits.
1573  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1574  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVZX32rr8),
1575  Result32).addReg(ResultReg);
1576 
1577  ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, /*Kill=*/true,
1578  X86::sub_16bit);
1579  } else if (DstVT != MVT::i8) {
1580  ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
1581  ResultReg, /*Kill=*/true);
1582  if (ResultReg == 0)
1583  return false;
1584  }
1585 
1586  updateValueMap(I, ResultReg);
1587  return true;
1588 }
1589 
1590 bool X86FastISel::X86SelectSExt(const Instruction *I) {
1591  EVT DstVT = TLI.getValueType(DL, I->getType());
1592  if (!TLI.isTypeLegal(DstVT))
1593  return false;
1594 
1595  unsigned ResultReg = getRegForValue(I->getOperand(0));
1596  if (ResultReg == 0)
1597  return false;
1598 
1599  // Handle sign-extension from i1 to i8.
1600  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1601  if (SrcVT == MVT::i1) {
1602  // Set the high bits to zero.
1603  unsigned ZExtReg = fastEmitZExtFromI1(MVT::i8, ResultReg,
1604  /*TODO: Kill=*/false);
1605  if (ZExtReg == 0)
1606  return false;
1607 
1608  // Negate the result to make an 8-bit sign extended value.
1609  ResultReg = createResultReg(&X86::GR8RegClass);
1610  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::NEG8r),
1611  ResultReg).addReg(ZExtReg);
1612 
1613  SrcVT = MVT::i8;
1614  }
1615 
1616  if (DstVT == MVT::i16) {
1617  // i8->i16 doesn't exist in the autogenerated isel table. Need to sign
1618  // extend to 32-bits and then extract down to 16-bits.
1619  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1620  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVSX32rr8),
1621  Result32).addReg(ResultReg);
1622 
1623  ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, /*Kill=*/true,
1624  X86::sub_16bit);
1625  } else if (DstVT != MVT::i8) {
1626  ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::SIGN_EXTEND,
1627  ResultReg, /*Kill=*/true);
1628  if (ResultReg == 0)
1629  return false;
1630  }
1631 
1632  updateValueMap(I, ResultReg);
1633  return true;
1634 }
1635 
1636 bool X86FastISel::X86SelectBranch(const Instruction *I) {
1637  // Unconditional branches are selected by tablegen-generated code.
1638  // Handle a conditional branch.
1639  const BranchInst *BI = cast<BranchInst>(I);
1640  MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1641  MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1642 
1643  // Fold the common case of a conditional branch with a comparison
1644  // in the same block (values defined on other blocks may not have
1645  // initialized registers).
1646  X86::CondCode CC;
1647  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1648  if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
1649  EVT VT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1650 
1651  // Try to optimize or fold the cmp.
1652  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1653  switch (Predicate) {
1654  default: break;
1655  case CmpInst::FCMP_FALSE: fastEmitBranch(FalseMBB, DbgLoc); return true;
1656  case CmpInst::FCMP_TRUE: fastEmitBranch(TrueMBB, DbgLoc); return true;
1657  }
1658 
1659  const Value *CmpLHS = CI->getOperand(0);
1660  const Value *CmpRHS = CI->getOperand(1);
1661 
1662  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x,
1663  // 0.0.
1664  // We don't have to materialize a zero constant for this case and can just
1665  // use %x again on the RHS.
1666  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
1667  const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
1668  if (CmpRHSC && CmpRHSC->isNullValue())
1669  CmpRHS = CmpLHS;
1670  }
1671 
1672  // Try to take advantage of fallthrough opportunities.
1673  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1674  std::swap(TrueMBB, FalseMBB);
1675  Predicate = CmpInst::getInversePredicate(Predicate);
1676  }
1677 
1678  // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition
1679  // code check. Instead two branch instructions are required to check all
1680  // the flags. First we change the predicate to a supported condition code,
1681  // which will be the first branch. Later one we will emit the second
1682  // branch.
1683  bool NeedExtraBranch = false;
1684  switch (Predicate) {
1685  default: break;
1686  case CmpInst::FCMP_OEQ:
1687  std::swap(TrueMBB, FalseMBB);
1689  case CmpInst::FCMP_UNE:
1690  NeedExtraBranch = true;
1691  Predicate = CmpInst::FCMP_ONE;
1692  break;
1693  }
1694 
1695  bool SwapArgs;
1696  unsigned BranchOpc;
1697  std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1698  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1699 
1700  BranchOpc = X86::GetCondBranchFromCond(CC);
1701  if (SwapArgs)
1702  std::swap(CmpLHS, CmpRHS);
1703 
1704  // Emit a compare of the LHS and RHS, setting the flags.
1705  if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->getDebugLoc()))
1706  return false;
1707 
1708  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
1709  .addMBB(TrueMBB);
1710 
1711  // X86 requires a second branch to handle UNE (and OEQ, which is mapped
1712  // to UNE above).
1713  if (NeedExtraBranch) {
1714  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JP_1))
1715  .addMBB(TrueMBB);
1716  }
1717 
1718  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1719  return true;
1720  }
1721  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1722  // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
1723  // typically happen for _Bool and C++ bools.
1724  MVT SourceVT;
1725  if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1726  isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
1727  unsigned TestOpc = 0;
1728  switch (SourceVT.SimpleTy) {
1729  default: break;
1730  case MVT::i8: TestOpc = X86::TEST8ri; break;
1731  case MVT::i16: TestOpc = X86::TEST16ri; break;
1732  case MVT::i32: TestOpc = X86::TEST32ri; break;
1733  case MVT::i64: TestOpc = X86::TEST64ri32; break;
1734  }
1735  if (TestOpc) {
1736  unsigned OpReg = getRegForValue(TI->getOperand(0));
1737  if (OpReg == 0) return false;
1738 
1739  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc))
1740  .addReg(OpReg).addImm(1);
1741 
1742  unsigned JmpOpc = X86::JNE_1;
1743  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1744  std::swap(TrueMBB, FalseMBB);
1745  JmpOpc = X86::JE_1;
1746  }
1747 
1748  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc))
1749  .addMBB(TrueMBB);
1750 
1751  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1752  return true;
1753  }
1754  }
1755  } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) {
1756  // Fake request the condition, otherwise the intrinsic might be completely
1757  // optimized away.
1758  unsigned TmpReg = getRegForValue(BI->getCondition());
1759  if (TmpReg == 0)
1760  return false;
1761 
1762  unsigned BranchOpc = X86::GetCondBranchFromCond(CC);
1763 
1764  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
1765  .addMBB(TrueMBB);
1766  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1767  return true;
1768  }
1769 
1770  // Otherwise do a clumsy setcc and re-test it.
1771  // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
1772  // in an explicit cast, so make sure to handle that correctly.
1773  unsigned OpReg = getRegForValue(BI->getCondition());
1774  if (OpReg == 0) return false;
1775 
1776  // In case OpReg is a K register, COPY to a GPR
1777  if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) {
1778  unsigned KOpReg = OpReg;
1779  OpReg = createResultReg(&X86::GR32RegClass);
1780  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1781  TII.get(TargetOpcode::COPY), OpReg)
1782  .addReg(KOpReg);
1783  OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, /*Kill=*/true,
1784  X86::sub_8bit);
1785  }
1786  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
1787  .addReg(OpReg)
1788  .addImm(1);
1789  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1))
1790  .addMBB(TrueMBB);
1791  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1792  return true;
1793 }
1794 
1795 bool X86FastISel::X86SelectShift(const Instruction *I) {
1796  unsigned CReg = 0, OpReg = 0;
1797  const TargetRegisterClass *RC = nullptr;
1798  if (I->getType()->isIntegerTy(8)) {
1799  CReg = X86::CL;
1800  RC = &X86::GR8RegClass;
1801  switch (I->getOpcode()) {
1802  case Instruction::LShr: OpReg = X86::SHR8rCL; break;
1803  case Instruction::AShr: OpReg = X86::SAR8rCL; break;
1804  case Instruction::Shl: OpReg = X86::SHL8rCL; break;
1805  default: return false;
1806  }
1807  } else if (I->getType()->isIntegerTy(16)) {
1808  CReg = X86::CX;
1809  RC = &X86::GR16RegClass;
1810  switch (I->getOpcode()) {
1811  default: llvm_unreachable("Unexpected shift opcode");
1812  case Instruction::LShr: OpReg = X86::SHR16rCL; break;
1813  case Instruction::AShr: OpReg = X86::SAR16rCL; break;
1814  case Instruction::Shl: OpReg = X86::SHL16rCL; break;
1815  }
1816  } else if (I->getType()->isIntegerTy(32)) {
1817  CReg = X86::ECX;
1818  RC = &X86::GR32RegClass;
1819  switch (I->getOpcode()) {
1820  default: llvm_unreachable("Unexpected shift opcode");
1821  case Instruction::LShr: OpReg = X86::SHR32rCL; break;
1822  case Instruction::AShr: OpReg = X86::SAR32rCL; break;
1823  case Instruction::Shl: OpReg = X86::SHL32rCL; break;
1824  }
1825  } else if (I->getType()->isIntegerTy(64)) {
1826  CReg = X86::RCX;
1827  RC = &X86::GR64RegClass;
1828  switch (I->getOpcode()) {
1829  default: llvm_unreachable("Unexpected shift opcode");
1830  case Instruction::LShr: OpReg = X86::SHR64rCL; break;
1831  case Instruction::AShr: OpReg = X86::SAR64rCL; break;
1832  case Instruction::Shl: OpReg = X86::SHL64rCL; break;
1833  }
1834  } else {
1835  return false;
1836  }
1837 
1838  MVT VT;
1839  if (!isTypeLegal(I->getType(), VT))
1840  return false;
1841 
1842  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1843  if (Op0Reg == 0) return false;
1844 
1845  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1846  if (Op1Reg == 0) return false;
1847  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
1848  CReg).addReg(Op1Reg);
1849 
1850  // The shift instruction uses X86::CL. If we defined a super-register
1851  // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
1852  if (CReg != X86::CL)
1853  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1854  TII.get(TargetOpcode::KILL), X86::CL)
1855  .addReg(CReg, RegState::Kill);
1856 
1857  unsigned ResultReg = createResultReg(RC);
1858  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg)
1859  .addReg(Op0Reg);
1860  updateValueMap(I, ResultReg);
1861  return true;
1862 }
1863 
1864 bool X86FastISel::X86SelectDivRem(const Instruction *I) {
1865  const static unsigned NumTypes = 4; // i8, i16, i32, i64
1866  const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem
1867  const static bool S = true; // IsSigned
1868  const static bool U = false; // !IsSigned
1869  const static unsigned Copy = TargetOpcode::COPY;
1870  // For the X86 DIV/IDIV instruction, in most cases the dividend
1871  // (numerator) must be in a specific register pair highreg:lowreg,
1872  // producing the quotient in lowreg and the remainder in highreg.
1873  // For most data types, to set up the instruction, the dividend is
1874  // copied into lowreg, and lowreg is sign-extended or zero-extended
1875  // into highreg. The exception is i8, where the dividend is defined
1876  // as a single register rather than a register pair, and we
1877  // therefore directly sign-extend or zero-extend the dividend into
1878  // lowreg, instead of copying, and ignore the highreg.
1879  const static struct DivRemEntry {
1880  // The following portion depends only on the data type.
1881  const TargetRegisterClass *RC;
1882  unsigned LowInReg; // low part of the register pair
1883  unsigned HighInReg; // high part of the register pair
1884  // The following portion depends on both the data type and the operation.
1885  struct DivRemResult {
1886  unsigned OpDivRem; // The specific DIV/IDIV opcode to use.
1887  unsigned OpSignExtend; // Opcode for sign-extending lowreg into
1888  // highreg, or copying a zero into highreg.
1889  unsigned OpCopy; // Opcode for copying dividend into lowreg, or
1890  // zero/sign-extending into lowreg for i8.
1891  unsigned DivRemResultReg; // Register containing the desired result.
1892  bool IsOpSigned; // Whether to use signed or unsigned form.
1893  } ResultTable[NumOps];
1894  } OpTable[NumTypes] = {
1895  { &X86::GR8RegClass, X86::AX, 0, {
1896  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S }, // SDiv
1897  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S }, // SRem
1898  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U }, // UDiv
1899  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U }, // URem
1900  }
1901  }, // i8
1902  { &X86::GR16RegClass, X86::AX, X86::DX, {
1903  { X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv
1904  { X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem
1905  { X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U }, // UDiv
1906  { X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U }, // URem
1907  }
1908  }, // i16
1909  { &X86::GR32RegClass, X86::EAX, X86::EDX, {
1910  { X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S }, // SDiv
1911  { X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S }, // SRem
1912  { X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U }, // UDiv
1913  { X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U }, // URem
1914  }
1915  }, // i32
1916  { &X86::GR64RegClass, X86::RAX, X86::RDX, {
1917  { X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv
1918  { X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem
1919  { X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv
1920  { X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem
1921  }
1922  }, // i64
1923  };
1924 
1925  MVT VT;
1926  if (!isTypeLegal(I->getType(), VT))
1927  return false;
1928 
1929  unsigned TypeIndex, OpIndex;
1930  switch (VT.SimpleTy) {
1931  default: return false;
1932  case MVT::i8: TypeIndex = 0; break;
1933  case MVT::i16: TypeIndex = 1; break;
1934  case MVT::i32: TypeIndex = 2; break;
1935  case MVT::i64: TypeIndex = 3;
1936  if (!Subtarget->is64Bit())
1937  return false;
1938  break;
1939  }
1940 
1941  switch (I->getOpcode()) {
1942  default: llvm_unreachable("Unexpected div/rem opcode");
1943  case Instruction::SDiv: OpIndex = 0; break;
1944  case Instruction::SRem: OpIndex = 1; break;
1945  case Instruction::UDiv: OpIndex = 2; break;
1946  case Instruction::URem: OpIndex = 3; break;
1947  }
1948 
1949  const DivRemEntry &TypeEntry = OpTable[TypeIndex];
1950  const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
1951  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1952  if (Op0Reg == 0)
1953  return false;
1954  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1955  if (Op1Reg == 0)
1956  return false;
1957 
1958  // Move op0 into low-order input register.
1959  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1960  TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg);
1961  // Zero-extend or sign-extend into high-order input register.
1962  if (OpEntry.OpSignExtend) {
1963  if (OpEntry.IsOpSigned)
1964  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1965  TII.get(OpEntry.OpSignExtend));
1966  else {
1967  unsigned Zero32 = createResultReg(&X86::GR32RegClass);
1968  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1969  TII.get(X86::MOV32r0), Zero32);
1970 
1971  // Copy the zero into the appropriate sub/super/identical physical
1972  // register. Unfortunately the operations needed are not uniform enough
1973  // to fit neatly into the table above.
1974  if (VT == MVT::i16) {
1975  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1976  TII.get(Copy), TypeEntry.HighInReg)
1977  .addReg(Zero32, 0, X86::sub_16bit);
1978  } else if (VT == MVT::i32) {
1979  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1980  TII.get(Copy), TypeEntry.HighInReg)
1981  .addReg(Zero32);
1982  } else if (VT == MVT::i64) {
1983  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1984  TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
1985  .addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
1986  }
1987  }
1988  }
1989  // Generate the DIV/IDIV instruction.
1990  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1991  TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
1992  // For i8 remainder, we can't reference ah directly, as we'll end
1993  // up with bogus copies like %r9b = COPY %ah. Reference ax
1994  // instead to prevent ah references in a rex instruction.
1995  //
1996  // The current assumption of the fast register allocator is that isel
1997  // won't generate explicit references to the GR8_NOREX registers. If
1998  // the allocator and/or the backend get enhanced to be more robust in
1999  // that regard, this can be, and should be, removed.
2000  unsigned ResultReg = 0;
2001  if ((I->getOpcode() == Instruction::SRem ||
2002  I->getOpcode() == Instruction::URem) &&
2003  OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) {
2004  unsigned SourceSuperReg = createResultReg(&X86::GR16RegClass);
2005  unsigned ResultSuperReg = createResultReg(&X86::GR16RegClass);
2006  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2007  TII.get(Copy), SourceSuperReg).addReg(X86::AX);
2008 
2009  // Shift AX right by 8 bits instead of using AH.
2010  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SHR16ri),
2011  ResultSuperReg).addReg(SourceSuperReg).addImm(8);
2012 
2013  // Now reference the 8-bit subreg of the result.
2014  ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
2015  /*Kill=*/true, X86::sub_8bit);
2016  }
2017  // Copy the result out of the physreg if we haven't already.
2018  if (!ResultReg) {
2019  ResultReg = createResultReg(TypeEntry.RC);
2020  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), ResultReg)
2021  .addReg(OpEntry.DivRemResultReg);
2022  }
2023  updateValueMap(I, ResultReg);
2024 
2025  return true;
2026 }
2027 
2028 /// Emit a conditional move instruction (if the are supported) to lower
2029 /// the select.
2030 bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
2031  // Check if the subtarget supports these instructions.
2032  if (!Subtarget->hasCMov())
2033  return false;
2034 
2035  // FIXME: Add support for i8.
2036  if (RetVT < MVT::i16 || RetVT > MVT::i64)
2037  return false;
2038 
2039  const Value *Cond = I->getOperand(0);
2040  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2041  bool NeedTest = true;
2043 
2044  // Optimize conditions coming from a compare if both instructions are in the
2045  // same basic block (values defined in other basic blocks may not have
2046  // initialized registers).
2047  const auto *CI = dyn_cast<CmpInst>(Cond);
2048  if (CI && (CI->getParent() == I->getParent())) {
2049  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2050 
2051  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
2052  static const uint16_t SETFOpcTable[2][3] = {
2053  { X86::SETNPr, X86::SETEr , X86::TEST8rr },
2054  { X86::SETPr, X86::SETNEr, X86::OR8rr }
2055  };
2056  const uint16_t *SETFOpc = nullptr;
2057  switch (Predicate) {
2058  default: break;
2059  case CmpInst::FCMP_OEQ:
2060  SETFOpc = &SETFOpcTable[0][0];
2061  Predicate = CmpInst::ICMP_NE;
2062  break;
2063  case CmpInst::FCMP_UNE:
2064  SETFOpc = &SETFOpcTable[1][0];
2065  Predicate = CmpInst::ICMP_NE;
2066  break;
2067  }
2068 
2069  bool NeedSwap;
2070  std::tie(CC, NeedSwap) = X86::getX86ConditionCode(Predicate);
2071  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
2072 
2073  const Value *CmpLHS = CI->getOperand(0);
2074  const Value *CmpRHS = CI->getOperand(1);
2075  if (NeedSwap)
2076  std::swap(CmpLHS, CmpRHS);
2077 
2078  EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2079  // Emit a compare of the LHS and RHS, setting the flags.
2080  if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2081  return false;
2082 
2083  if (SETFOpc) {
2084  unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
2085  unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
2086  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
2087  FlagReg1);
2088  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
2089  FlagReg2);
2090  auto const &II = TII.get(SETFOpc[2]);
2091  if (II.getNumDefs()) {
2092  unsigned TmpReg = createResultReg(&X86::GR8RegClass);
2093  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg)
2094  .addReg(FlagReg2).addReg(FlagReg1);
2095  } else {
2096  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2097  .addReg(FlagReg2).addReg(FlagReg1);
2098  }
2099  }
2100  NeedTest = false;
2101  } else if (foldX86XALUIntrinsic(CC, I, Cond)) {
2102  // Fake request the condition, otherwise the intrinsic might be completely
2103  // optimized away.
2104  unsigned TmpReg = getRegForValue(Cond);
2105  if (TmpReg == 0)
2106  return false;
2107 
2108  NeedTest = false;
2109  }
2110 
2111  if (NeedTest) {
2112  // Selects operate on i1, however, CondReg is 8 bits width and may contain
2113  // garbage. Indeed, only the less significant bit is supposed to be
2114  // accurate. If we read more than the lsb, we may see non-zero values
2115  // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
2116  // the select. This is achieved by performing TEST against 1.
2117  unsigned CondReg = getRegForValue(Cond);
2118  if (CondReg == 0)
2119  return false;
2120  bool CondIsKill = hasTrivialKill(Cond);
2121 
2122  // In case OpReg is a K register, COPY to a GPR
2123  if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2124  unsigned KCondReg = CondReg;
2125  CondReg = createResultReg(&X86::GR32RegClass);
2126  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2127  TII.get(TargetOpcode::COPY), CondReg)
2128  .addReg(KCondReg, getKillRegState(CondIsKill));
2129  CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
2130  X86::sub_8bit);
2131  }
2132  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2133  .addReg(CondReg, getKillRegState(CondIsKill))
2134  .addImm(1);
2135  }
2136 
2137  const Value *LHS = I->getOperand(1);
2138  const Value *RHS = I->getOperand(2);
2139 
2140  unsigned RHSReg = getRegForValue(RHS);
2141  bool RHSIsKill = hasTrivialKill(RHS);
2142 
2143  unsigned LHSReg = getRegForValue(LHS);
2144  bool LHSIsKill = hasTrivialKill(LHS);
2145 
2146  if (!LHSReg || !RHSReg)
2147  return false;
2148 
2149  const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo();
2150  unsigned Opc = X86::getCMovFromCond(CC, TRI.getRegSizeInBits(*RC)/8);
2151  unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill,
2152  LHSReg, LHSIsKill);
2153  updateValueMap(I, ResultReg);
2154  return true;
2155 }
2156 
2157 /// Emit SSE or AVX instructions to lower the select.
2158 ///
2159 /// Try to use SSE1/SSE2 instructions to simulate a select without branches.
2160 /// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
2161 /// SSE instructions are available. If AVX is available, try to use a VBLENDV.
2162 bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
2163  // Optimize conditions coming from a compare if both instructions are in the
2164  // same basic block (values defined in other basic blocks may not have
2165  // initialized registers).
2166  const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0));
2167  if (!CI || (CI->getParent() != I->getParent()))
2168  return false;
2169 
2170  if (I->getType() != CI->getOperand(0)->getType() ||
2171  !((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
2172  (Subtarget->hasSSE2() && RetVT == MVT::f64)))
2173  return false;
2174 
2175  const Value *CmpLHS = CI->getOperand(0);
2176  const Value *CmpRHS = CI->getOperand(1);
2177  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2178 
2179  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
2180  // We don't have to materialize a zero constant for this case and can just use
2181  // %x again on the RHS.
2182  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
2183  const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
2184  if (CmpRHSC && CmpRHSC->isNullValue())
2185  CmpRHS = CmpLHS;
2186  }
2187 
2188  unsigned CC;
2189  bool NeedSwap;
2190  std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate);
2191  if (CC > 7 && !Subtarget->hasAVX())
2192  return false;
2193 
2194  if (NeedSwap)
2195  std::swap(CmpLHS, CmpRHS);
2196 
2197  // Choose the SSE instruction sequence based on data type (float or double).
2198  static const uint16_t OpcTable[2][4] = {
2199  { X86::CMPSSrr, X86::ANDPSrr, X86::ANDNPSrr, X86::ORPSrr },
2200  { X86::CMPSDrr, X86::ANDPDrr, X86::ANDNPDrr, X86::ORPDrr }
2201  };
2202 
2203  const uint16_t *Opc = nullptr;
2204  switch (RetVT.SimpleTy) {
2205  default: return false;
2206  case MVT::f32: Opc = &OpcTable[0][0]; break;
2207  case MVT::f64: Opc = &OpcTable[1][0]; break;
2208  }
2209 
2210  const Value *LHS = I->getOperand(1);
2211  const Value *RHS = I->getOperand(2);
2212 
2213  unsigned LHSReg = getRegForValue(LHS);
2214  bool LHSIsKill = hasTrivialKill(LHS);
2215 
2216  unsigned RHSReg = getRegForValue(RHS);
2217  bool RHSIsKill = hasTrivialKill(RHS);
2218 
2219  unsigned CmpLHSReg = getRegForValue(CmpLHS);
2220  bool CmpLHSIsKill = hasTrivialKill(CmpLHS);
2221 
2222  unsigned CmpRHSReg = getRegForValue(CmpRHS);
2223  bool CmpRHSIsKill = hasTrivialKill(CmpRHS);
2224 
2225  if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS)
2226  return false;
2227 
2228  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2229  unsigned ResultReg;
2230 
2231  if (Subtarget->hasAVX512()) {
2232  // If we have AVX512 we can use a mask compare and masked movss/sd.
2233  const TargetRegisterClass *VR128X = &X86::VR128XRegClass;
2234  const TargetRegisterClass *VK1 = &X86::VK1RegClass;
2235 
2236  unsigned CmpOpcode =
2237  (RetVT == MVT::f32) ? X86::VCMPSSZrr : X86::VCMPSDZrr;
2238  unsigned CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpLHSIsKill,
2239  CmpRHSReg, CmpRHSIsKill, CC);
2240 
2241  // Need an IMPLICIT_DEF for the input that is used to generate the upper
2242  // bits of the result register since its not based on any of the inputs.
2243  unsigned ImplicitDefReg = createResultReg(VR128X);
2244  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2245  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2246 
2247  // Place RHSReg is the passthru of the masked movss/sd operation and put
2248  // LHS in the input. The mask input comes from the compare.
2249  unsigned MovOpcode =
2250  (RetVT == MVT::f32) ? X86::VMOVSSZrrk : X86::VMOVSDZrrk;
2251  unsigned MovReg = fastEmitInst_rrrr(MovOpcode, VR128X, RHSReg, RHSIsKill,
2252  CmpReg, true, ImplicitDefReg, true,
2253  LHSReg, LHSIsKill);
2254 
2255  ResultReg = createResultReg(RC);
2256  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2257  TII.get(TargetOpcode::COPY), ResultReg).addReg(MovReg);
2258 
2259  } else if (Subtarget->hasAVX()) {
2260  const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2261 
2262  // If we have AVX, create 1 blendv instead of 3 logic instructions.
2263  // Blendv was introduced with SSE 4.1, but the 2 register form implicitly
2264  // uses XMM0 as the selection register. That may need just as many
2265  // instructions as the AND/ANDN/OR sequence due to register moves, so
2266  // don't bother.
2267  unsigned CmpOpcode =
2268  (RetVT == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
2269  unsigned BlendOpcode =
2270  (RetVT == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
2271 
2272  unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill,
2273  CmpRHSReg, CmpRHSIsKill, CC);
2274  unsigned VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill,
2275  LHSReg, LHSIsKill, CmpReg, true);
2276  ResultReg = createResultReg(RC);
2277  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2278  TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg);
2279  } else {
2280  const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2281  unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
2282  CmpRHSReg, CmpRHSIsKill, CC);
2283  unsigned AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, /*IsKill=*/false,
2284  LHSReg, LHSIsKill);
2285  unsigned AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, /*IsKill=*/true,
2286  RHSReg, RHSIsKill);
2287  unsigned OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, /*IsKill=*/true,
2288  AndReg, /*IsKill=*/true);
2289  ResultReg = createResultReg(RC);
2290  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2291  TII.get(TargetOpcode::COPY), ResultReg).addReg(OrReg);
2292  }
2293  updateValueMap(I, ResultReg);
2294  return true;
2295 }
2296 
2297 bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
2298  // These are pseudo CMOV instructions and will be later expanded into control-
2299  // flow.
2300  unsigned Opc;
2301  switch (RetVT.SimpleTy) {
2302  default: return false;
2303  case MVT::i8: Opc = X86::CMOV_GR8; break;
2304  case MVT::i16: Opc = X86::CMOV_GR16; break;
2305  case MVT::i32: Opc = X86::CMOV_GR32; break;
2306  case MVT::f32: Opc = X86::CMOV_FR32; break;
2307  case MVT::f64: Opc = X86::CMOV_FR64; break;
2308  }
2309 
2310  const Value *Cond = I->getOperand(0);
2312 
2313  // Optimize conditions coming from a compare if both instructions are in the
2314  // same basic block (values defined in other basic blocks may not have
2315  // initialized registers).
2316  const auto *CI = dyn_cast<CmpInst>(Cond);
2317  if (CI && (CI->getParent() == I->getParent())) {
2318  bool NeedSwap;
2319  std::tie(CC, NeedSwap) = X86::getX86ConditionCode(CI->getPredicate());
2320  if (CC > X86::LAST_VALID_COND)
2321  return false;
2322 
2323  const Value *CmpLHS = CI->getOperand(0);
2324  const Value *CmpRHS = CI->getOperand(1);
2325 
2326  if (NeedSwap)
2327  std::swap(CmpLHS, CmpRHS);
2328 
2329  EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2330  if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2331  return false;
2332  } else {
2333  unsigned CondReg = getRegForValue(Cond);
2334  if (CondReg == 0)
2335  return false;
2336  bool CondIsKill = hasTrivialKill(Cond);
2337 
2338  // In case OpReg is a K register, COPY to a GPR
2339  if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2340  unsigned KCondReg = CondReg;
2341  CondReg = createResultReg(&X86::GR32RegClass);
2342  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2343  TII.get(TargetOpcode::COPY), CondReg)
2344  .addReg(KCondReg, getKillRegState(CondIsKill));
2345  CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
2346  X86::sub_8bit);
2347  }
2348  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2349  .addReg(CondReg, getKillRegState(CondIsKill))
2350  .addImm(1);
2351  }
2352 
2353  const Value *LHS = I->getOperand(1);
2354  const Value *RHS = I->getOperand(2);
2355 
2356  unsigned LHSReg = getRegForValue(LHS);
2357  bool LHSIsKill = hasTrivialKill(LHS);
2358 
2359  unsigned RHSReg = getRegForValue(RHS);
2360  bool RHSIsKill = hasTrivialKill(RHS);
2361 
2362  if (!LHSReg || !RHSReg)
2363  return false;
2364 
2365  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2366 
2367  unsigned ResultReg =
2368  fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC);
2369  updateValueMap(I, ResultReg);
2370  return true;
2371 }
2372 
2373 bool X86FastISel::X86SelectSelect(const Instruction *I) {
2374  MVT RetVT;
2375  if (!isTypeLegal(I->getType(), RetVT))
2376  return false;
2377 
2378  // Check if we can fold the select.
2379  if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) {
2380  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2381  const Value *Opnd = nullptr;
2382  switch (Predicate) {
2383  default: break;
2384  case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break;
2385  case CmpInst::FCMP_TRUE: Opnd = I->getOperand(1); break;
2386  }
2387  // No need for a select anymore - this is an unconditional move.
2388  if (Opnd) {
2389  unsigned OpReg = getRegForValue(Opnd);
2390  if (OpReg == 0)
2391  return false;
2392  bool OpIsKill = hasTrivialKill(Opnd);
2393  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2394  unsigned ResultReg = createResultReg(RC);
2395  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2396  TII.get(TargetOpcode::COPY), ResultReg)
2397  .addReg(OpReg, getKillRegState(OpIsKill));
2398  updateValueMap(I, ResultReg);
2399  return true;
2400  }
2401  }
2402 
2403  // First try to use real conditional move instructions.
2404  if (X86FastEmitCMoveSelect(RetVT, I))
2405  return true;
2406 
2407  // Try to use a sequence of SSE instructions to simulate a conditional move.
2408  if (X86FastEmitSSESelect(RetVT, I))
2409  return true;
2410 
2411  // Fall-back to pseudo conditional move instructions, which will be later
2412  // converted to control-flow.
2413  if (X86FastEmitPseudoSelect(RetVT, I))
2414  return true;
2415 
2416  return false;
2417 }
2418 
2419 // Common code for X86SelectSIToFP and X86SelectUIToFP.
2420 bool X86FastISel::X86SelectIntToFP(const Instruction *I, bool IsSigned) {
2421  // The target-independent selection algorithm in FastISel already knows how
2422  // to select a SINT_TO_FP if the target is SSE but not AVX.
2423  // Early exit if the subtarget doesn't have AVX.
2424  // Unsigned conversion requires avx512.
2425  bool HasAVX512 = Subtarget->hasAVX512();
2426  if (!Subtarget->hasAVX() || (!IsSigned && !HasAVX512))
2427  return false;
2428 
2429  // TODO: We could sign extend narrower types.
2430  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
2431  if (SrcVT != MVT::i32 && SrcVT != MVT::i64)
2432  return false;
2433 
2434  // Select integer to float/double conversion.
2435  unsigned OpReg = getRegForValue(I->getOperand(0));
2436  if (OpReg == 0)
2437  return false;
2438 
2439  unsigned Opcode;
2440 
2441  static const uint16_t SCvtOpc[2][2][2] = {
2442  { { X86::VCVTSI2SSrr, X86::VCVTSI642SSrr },
2443  { X86::VCVTSI2SDrr, X86::VCVTSI642SDrr } },
2444  { { X86::VCVTSI2SSZrr, X86::VCVTSI642SSZrr },
2445  { X86::VCVTSI2SDZrr, X86::VCVTSI642SDZrr } },
2446  };
2447  static const uint16_t UCvtOpc[2][2] = {
2448  { X86::VCVTUSI2SSZrr, X86::VCVTUSI642SSZrr },
2449  { X86::VCVTUSI2SDZrr, X86::VCVTUSI642SDZrr },
2450  };
2451  bool Is64Bit = SrcVT == MVT::i64;
2452 
2453  if (I->getType()->isDoubleTy()) {
2454  // s/uitofp int -> double
2455  Opcode = IsSigned ? SCvtOpc[HasAVX512][1][Is64Bit] : UCvtOpc[1][Is64Bit];
2456  } else if (I->getType()->isFloatTy()) {
2457  // s/uitofp int -> float
2458  Opcode = IsSigned ? SCvtOpc[HasAVX512][0][Is64Bit] : UCvtOpc[0][Is64Bit];
2459  } else
2460  return false;
2461 
2462  MVT DstVT = TLI.getValueType(DL, I->getType()).getSimpleVT();
2463  const TargetRegisterClass *RC = TLI.getRegClassFor(DstVT);
2464  unsigned ImplicitDefReg = createResultReg(RC);
2465  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2466  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2467  unsigned ResultReg =
2468  fastEmitInst_rr(Opcode, RC, ImplicitDefReg, true, OpReg, false);
2469  updateValueMap(I, ResultReg);
2470  return true;
2471 }
2472 
2473 bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
2474  return X86SelectIntToFP(I, /*IsSigned*/true);
2475 }
2476 
2477 bool X86FastISel::X86SelectUIToFP(const Instruction *I) {
2478  return X86SelectIntToFP(I, /*IsSigned*/false);
2479 }
2480 
2481 // Helper method used by X86SelectFPExt and X86SelectFPTrunc.
2482 bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
2483  unsigned TargetOpc,
2484  const TargetRegisterClass *RC) {
2485  assert((I->getOpcode() == Instruction::FPExt ||
2486  I->getOpcode() == Instruction::FPTrunc) &&
2487  "Instruction must be an FPExt or FPTrunc!");
2488 
2489  unsigned OpReg = getRegForValue(I->getOperand(0));
2490  if (OpReg == 0)
2491  return false;
2492 
2493  unsigned ImplicitDefReg;
2494  if (Subtarget->hasAVX()) {
2495  ImplicitDefReg = createResultReg(RC);
2496  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2497  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2498 
2499  }
2500 
2501  unsigned ResultReg = createResultReg(RC);
2502  MachineInstrBuilder MIB;
2503  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc),
2504  ResultReg);
2505 
2506  if (Subtarget->hasAVX())
2507  MIB.addReg(ImplicitDefReg);
2508 
2509  MIB.addReg(OpReg);
2510  updateValueMap(I, ResultReg);
2511  return true;
2512 }
2513 
2514 bool X86FastISel::X86SelectFPExt(const Instruction *I) {
2515  if (X86ScalarSSEf64 && I->getType()->isDoubleTy() &&
2516  I->getOperand(0)->getType()->isFloatTy()) {
2517  bool HasAVX512 = Subtarget->hasAVX512();
2518  // fpext from float to double.
2519  unsigned Opc =
2520  HasAVX512 ? X86::VCVTSS2SDZrr
2521  : Subtarget->hasAVX() ? X86::VCVTSS2SDrr : X86::CVTSS2SDrr;
2522  return X86SelectFPExtOrFPTrunc(
2523  I, Opc, HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass);
2524  }
2525 
2526  return false;
2527 }
2528 
2529 bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
2530  if (X86ScalarSSEf64 && I->getType()->isFloatTy() &&
2531  I->getOperand(0)->getType()->isDoubleTy()) {
2532  bool HasAVX512 = Subtarget->hasAVX512();
2533  // fptrunc from double to float.
2534  unsigned Opc =
2535  HasAVX512 ? X86::VCVTSD2SSZrr
2536  : Subtarget->hasAVX() ? X86::VCVTSD2SSrr : X86::CVTSD2SSrr;
2537  return X86SelectFPExtOrFPTrunc(
2538  I, Opc, HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass);
2539  }
2540 
2541  return false;
2542 }
2543 
2544 bool X86FastISel::X86SelectTrunc(const Instruction *I) {
2545  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
2546  EVT DstVT = TLI.getValueType(DL, I->getType());
2547 
2548  // This code only handles truncation to byte.
2549  if (DstVT != MVT::i8 && DstVT != MVT::i1)
2550  return false;
2551  if (!TLI.isTypeLegal(SrcVT))
2552  return false;
2553 
2554  unsigned InputReg = getRegForValue(I->getOperand(0));
2555  if (!InputReg)
2556  // Unhandled operand. Halt "fast" selection and bail.
2557  return false;
2558 
2559  if (SrcVT == MVT::i8) {
2560  // Truncate from i8 to i1; no code needed.
2561  updateValueMap(I, InputReg);
2562  return true;
2563  }
2564 
2565  // Issue an extract_subreg.
2566  unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8,
2567  InputReg, false,
2568  X86::sub_8bit);
2569  if (!ResultReg)
2570  return false;
2571 
2572  updateValueMap(I, ResultReg);
2573  return true;
2574 }
2575 
2576 bool X86FastISel::IsMemcpySmall(uint64_t Len) {
2577  return Len <= (Subtarget->is64Bit() ? 32 : 16);
2578 }
2579 
2580 bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
2581  X86AddressMode SrcAM, uint64_t Len) {
2582 
2583  // Make sure we don't bloat code by inlining very large memcpy's.
2584  if (!IsMemcpySmall(Len))
2585  return false;
2586 
2587  bool i64Legal = Subtarget->is64Bit();
2588 
2589  // We don't care about alignment here since we just emit integer accesses.
2590  while (Len) {
2591  MVT VT;
2592  if (Len >= 8 && i64Legal)
2593  VT = MVT::i64;
2594  else if (Len >= 4)
2595  VT = MVT::i32;
2596  else if (Len >= 2)
2597  VT = MVT::i16;
2598  else
2599  VT = MVT::i8;
2600 
2601  unsigned Reg;
2602  bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
2603  RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM);
2604  assert(RV && "Failed to emit load or store??");
2605 
2606  unsigned Size = VT.getSizeInBits()/8;
2607  Len -= Size;
2608  DestAM.Disp += Size;
2609  SrcAM.Disp += Size;
2610  }
2611 
2612  return true;
2613 }
2614 
2615 bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
2616  // FIXME: Handle more intrinsics.
2617  switch (II->getIntrinsicID()) {
2618  default: return false;
2621  if (Subtarget->useSoftFloat() || !Subtarget->hasF16C())
2622  return false;
2623 
2624  const Value *Op = II->getArgOperand(0);
2625  unsigned InputReg = getRegForValue(Op);
2626  if (InputReg == 0)
2627  return false;
2628 
2629  // F16C only allows converting from float to half and from half to float.
2630  bool IsFloatToHalf = II->getIntrinsicID() == Intrinsic::convert_to_fp16;
2631  if (IsFloatToHalf) {
2632  if (!Op->getType()->isFloatTy())
2633  return false;
2634  } else {
2635  if (!II->getType()->isFloatTy())
2636  return false;
2637  }
2638 
2639  unsigned ResultReg = 0;
2640  const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::v8i16);
2641  if (IsFloatToHalf) {
2642  // 'InputReg' is implicitly promoted from register class FR32 to
2643  // register class VR128 by method 'constrainOperandRegClass' which is
2644  // directly called by 'fastEmitInst_ri'.
2645  // Instruction VCVTPS2PHrr takes an extra immediate operand which is
2646  // used to provide rounding control: use MXCSR.RC, encoded as 0b100.
2647  // It's consistent with the other FP instructions, which are usually
2648  // controlled by MXCSR.
2649  InputReg = fastEmitInst_ri(X86::VCVTPS2PHrr, RC, InputReg, false, 4);
2650 
2651  // Move the lower 32-bits of ResultReg to another register of class GR32.
2652  ResultReg = createResultReg(&X86::GR32RegClass);
2653  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2654  TII.get(X86::VMOVPDI2DIrr), ResultReg)
2655  .addReg(InputReg, RegState::Kill);
2656 
2657  // The result value is in the lower 16-bits of ResultReg.
2658  unsigned RegIdx = X86::sub_16bit;
2659  ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, true, RegIdx);
2660  } else {
2661  assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!");
2662  // Explicitly sign-extend the input to 32-bit.
2663  InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::SIGN_EXTEND, InputReg,
2664  /*Kill=*/false);
2665 
2666  // The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr.
2667  InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR,
2668  InputReg, /*Kill=*/true);
2669 
2670  InputReg = fastEmitInst_r(X86::VCVTPH2PSrr, RC, InputReg, /*Kill=*/true);
2671 
2672  // The result value is in the lower 32-bits of ResultReg.
2673  // Emit an explicit copy from register class VR128 to register class FR32.
2674  ResultReg = createResultReg(&X86::FR32RegClass);
2675  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2676  TII.get(TargetOpcode::COPY), ResultReg)
2677  .addReg(InputReg, RegState::Kill);
2678  }
2679 
2680  updateValueMap(II, ResultReg);
2681  return true;
2682  }
2683  case Intrinsic::frameaddress: {
2684  MachineFunction *MF = FuncInfo.MF;
2685  if (MF->getTarget().getMCAsmInfo()->usesWindowsCFI())
2686  return false;
2687 
2688  Type *RetTy = II->getCalledFunction()->getReturnType();
2689 
2690  MVT VT;
2691  if (!isTypeLegal(RetTy, VT))
2692  return false;
2693 
2694  unsigned Opc;
2695  const TargetRegisterClass *RC = nullptr;
2696 
2697  switch (VT.SimpleTy) {
2698  default: llvm_unreachable("Invalid result type for frameaddress.");
2699  case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break;
2700  case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break;
2701  }
2702 
2703  // This needs to be set before we call getPtrSizedFrameRegister, otherwise
2704  // we get the wrong frame register.
2705  MachineFrameInfo &MFI = MF->getFrameInfo();
2706  MFI.setFrameAddressIsTaken(true);
2707 
2708  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2709  unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*MF);
2710  assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
2711  (FrameReg == X86::EBP && VT == MVT::i32)) &&
2712  "Invalid Frame Register!");
2713 
2714  // Always make a copy of the frame register to a vreg first, so that we
2715  // never directly reference the frame register (the TwoAddressInstruction-
2716  // Pass doesn't like that).
2717  unsigned SrcReg = createResultReg(RC);
2718  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2719  TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg);
2720 
2721  // Now recursively load from the frame address.
2722  // movq (%rbp), %rax
2723  // movq (%rax), %rax
2724  // movq (%rax), %rax
2725  // ...
2726  unsigned DestReg;
2727  unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
2728  while (Depth--) {
2729  DestReg = createResultReg(RC);
2730  addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2731  TII.get(Opc), DestReg), SrcReg);
2732  SrcReg = DestReg;
2733  }
2734 
2735  updateValueMap(II, SrcReg);
2736  return true;
2737  }
2738  case Intrinsic::memcpy: {
2739  const MemCpyInst *MCI = cast<MemCpyInst>(II);
2740  // Don't handle volatile or variable length memcpys.
2741  if (MCI->isVolatile())
2742  return false;
2743 
2744  if (isa<ConstantInt>(MCI->getLength())) {
2745  // Small memcpy's are common enough that we want to do them
2746  // without a call if possible.
2747  uint64_t Len = cast<ConstantInt>(MCI->getLength())->getZExtValue();
2748  if (IsMemcpySmall(Len)) {
2749  X86AddressMode DestAM, SrcAM;
2750  if (!X86SelectAddress(MCI->getRawDest(), DestAM) ||
2751  !X86SelectAddress(MCI->getRawSource(), SrcAM))
2752  return false;
2753  TryEmitSmallMemcpy(DestAM, SrcAM, Len);
2754  return true;
2755  }
2756  }
2757 
2758  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2759  if (!MCI->getLength()->getType()->isIntegerTy(SizeWidth))
2760  return false;
2761 
2762  if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255)
2763  return false;
2764 
2765  return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 1);
2766  }
2767  case Intrinsic::memset: {
2768  const MemSetInst *MSI = cast<MemSetInst>(II);
2769 
2770  if (MSI->isVolatile())
2771  return false;
2772 
2773  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2774  if (!MSI->getLength()->getType()->isIntegerTy(SizeWidth))
2775  return false;
2776 
2777  if (MSI->getDestAddressSpace() > 255)
2778  return false;
2779 
2780  return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
2781  }
2783  // Emit code to store the stack guard onto the stack.
2784  EVT PtrTy = TLI.getPointerTy(DL);
2785 
2786  const Value *Op1 = II->getArgOperand(0); // The guard's value.
2787  const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1));
2788 
2789  MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]);
2790 
2791  // Grab the frame index.
2792  X86AddressMode AM;
2793  if (!X86SelectAddress(Slot, AM)) return false;
2794  if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
2795  return true;
2796  }
2797  case Intrinsic::dbg_declare: {
2798  const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
2799  X86AddressMode AM;
2800  assert(DI->getAddress() && "Null address should be checked earlier!");
2801  if (!X86SelectAddress(DI->getAddress(), AM))
2802  return false;
2803  const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
2804  // FIXME may need to add RegState::Debug to any registers produced,
2805  // although ESP/EBP should be the only ones at the moment.
2807  "Expected inlined-at fields to agree");
2808  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM)
2809  .addImm(0)
2810  .addMetadata(DI->getVariable())
2811  .addMetadata(DI->getExpression());
2812  return true;
2813  }
2814  case Intrinsic::trap: {
2815  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP));
2816  return true;
2817  }
2818  case Intrinsic::sqrt: {
2819  if (!Subtarget->hasSSE1())
2820  return false;
2821 
2822  Type *RetTy = II->getCalledFunction()->getReturnType();
2823 
2824  MVT VT;
2825  if (!isTypeLegal(RetTy, VT))
2826  return false;
2827 
2828  // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT
2829  // is not generated by FastISel yet.
2830  // FIXME: Update this code once tablegen can handle it.
2831  static const uint16_t SqrtOpc[3][2] = {
2832  { X86::SQRTSSr, X86::SQRTSDr },
2833  { X86::VSQRTSSr, X86::VSQRTSDr },
2834  { X86::VSQRTSSZr, X86::VSQRTSDZr },
2835  };
2836  unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
2837  Subtarget->hasAVX() ? 1 :
2838  0;
2839  unsigned Opc;
2840  switch (VT.SimpleTy) {
2841  default: return false;
2842  case MVT::f32: Opc = SqrtOpc[AVXLevel][0]; break;
2843  case MVT::f64: Opc = SqrtOpc[AVXLevel][1]; break;
2844  }
2845 
2846  const Value *SrcVal = II->getArgOperand(0);
2847  unsigned SrcReg = getRegForValue(SrcVal);
2848 
2849  if (SrcReg == 0)
2850  return false;
2851 
2852  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
2853  unsigned ImplicitDefReg = 0;
2854  if (AVXLevel > 0) {
2855  ImplicitDefReg = createResultReg(RC);
2856  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2857  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2858  }
2859 
2860  unsigned ResultReg = createResultReg(RC);
2861  MachineInstrBuilder MIB;
2862  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
2863  ResultReg);
2864 
2865  if (ImplicitDefReg)
2866  MIB.addReg(ImplicitDefReg);
2867 
2868  MIB.addReg(SrcReg);
2869 
2870  updateValueMap(II, ResultReg);
2871  return true;
2872  }
2879  // This implements the basic lowering of the xalu with overflow intrinsics
2880  // into add/sub/mul followed by either seto or setb.
2881  const Function *Callee = II->getCalledFunction();
2882  auto *Ty = cast<StructType>(Callee->getReturnType());
2883  Type *RetTy = Ty->getTypeAtIndex(0U);
2884  assert(Ty->getTypeAtIndex(1)->isIntegerTy() &&
2885  Ty->getTypeAtIndex(1)->getScalarSizeInBits() == 1 &&
2886  "Overflow value expected to be an i1");
2887 
2888  MVT VT;
2889  if (!isTypeLegal(RetTy, VT))
2890  return false;
2891 
2892  if (VT < MVT::i8 || VT > MVT::i64)
2893  return false;
2894 
2895  const Value *LHS = II->getArgOperand(0);
2896  const Value *RHS = II->getArgOperand(1);
2897 
2898  // Canonicalize immediate to the RHS.
2899  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
2900  isCommutativeIntrinsic(II))
2901  std::swap(LHS, RHS);
2902 
2903  unsigned BaseOpc, CondOpc;
2904  switch (II->getIntrinsicID()) {
2905  default: llvm_unreachable("Unexpected intrinsic!");
2907  BaseOpc = ISD::ADD; CondOpc = X86::SETOr; break;
2909  BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break;
2911  BaseOpc = ISD::SUB; CondOpc = X86::SETOr; break;
2913  BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
2915  BaseOpc = X86ISD::SMUL; CondOpc = X86::SETOr; break;
2917  BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break;
2918  }
2919 
2920  unsigned LHSReg = getRegForValue(LHS);
2921  if (LHSReg == 0)
2922  return false;
2923  bool LHSIsKill = hasTrivialKill(LHS);
2924 
2925  unsigned ResultReg = 0;
2926  // Check if we have an immediate version.
2927  if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
2928  static const uint16_t Opc[2][4] = {
2929  { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r },
2930  { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
2931  };
2932 
2933  if (CI->isOne() && (BaseOpc == ISD::ADD || BaseOpc == ISD::SUB) &&
2934  CondOpc == X86::SETOr) {
2935  // We can use INC/DEC.
2936  ResultReg = createResultReg(TLI.getRegClassFor(VT));
2937  bool IsDec = BaseOpc == ISD::SUB;
2938  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2939  TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
2940  .addReg(LHSReg, getKillRegState(LHSIsKill));
2941  } else
2942  ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill,
2943  CI->getZExtValue());
2944  }
2945 
2946  unsigned RHSReg;
2947  bool RHSIsKill;
2948  if (!ResultReg) {
2949  RHSReg = getRegForValue(RHS);
2950  if (RHSReg == 0)
2951  return false;
2952  RHSIsKill = hasTrivialKill(RHS);
2953  ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, LHSIsKill, RHSReg,
2954  RHSIsKill);
2955  }
2956 
2957  // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
2958  // it manually.
2959  if (BaseOpc == X86ISD::UMUL && !ResultReg) {
2960  static const uint16_t MULOpc[] =
2961  { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
2962  static const MCPhysReg Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
2963  // First copy the first operand into RAX, which is an implicit input to
2964  // the X86::MUL*r instruction.
2965  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2966  TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8])
2967  .addReg(LHSReg, getKillRegState(LHSIsKill));
2968  ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
2969  TLI.getRegClassFor(VT), RHSReg, RHSIsKill);
2970  } else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
2971  static const uint16_t MULOpc[] =
2972  { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
2973  if (VT == MVT::i8) {
2974  // Copy the first operand into AL, which is an implicit input to the
2975  // X86::IMUL8r instruction.
2976  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2977  TII.get(TargetOpcode::COPY), X86::AL)
2978  .addReg(LHSReg, getKillRegState(LHSIsKill));
2979  ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg,
2980  RHSIsKill);
2981  } else
2982  ResultReg = fastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
2983  TLI.getRegClassFor(VT), LHSReg, LHSIsKill,
2984  RHSReg, RHSIsKill);
2985  }
2986 
2987  if (!ResultReg)
2988  return false;
2989 
2990  // Assign to a GPR since the overflow return value is lowered to a SETcc.
2991  unsigned ResultReg2 = createResultReg(&X86::GR8RegClass);
2992  assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
2993  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CondOpc),
2994  ResultReg2);
2995 
2996  updateValueMap(II, ResultReg, 2);
2997  return true;
2998  }
3003  bool IsInputDouble;
3004  switch (II->getIntrinsicID()) {
3005  default: llvm_unreachable("Unexpected intrinsic.");
3008  if (!Subtarget->hasSSE1())
3009  return false;
3010  IsInputDouble = false;
3011  break;
3014  if (!Subtarget->hasSSE2())
3015  return false;
3016  IsInputDouble = true;
3017  break;
3018  }
3019 
3020  Type *RetTy = II->getCalledFunction()->getReturnType();
3021  MVT VT;
3022  if (!isTypeLegal(RetTy, VT))
3023  return false;
3024 
3025  static const uint16_t CvtOpc[3][2][2] = {
3026  { { X86::CVTTSS2SIrr, X86::CVTTSS2SI64rr },
3027  { X86::CVTTSD2SIrr, X86::CVTTSD2SI64rr } },
3028  { { X86::VCVTTSS2SIrr, X86::VCVTTSS2SI64rr },
3029  { X86::VCVTTSD2SIrr, X86::VCVTTSD2SI64rr } },
3030  { { X86::VCVTTSS2SIZrr, X86::VCVTTSS2SI64Zrr },
3031  { X86::VCVTTSD2SIZrr, X86::VCVTTSD2SI64Zrr } },
3032  };
3033  unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
3034  Subtarget->hasAVX() ? 1 :
3035  0;
3036  unsigned Opc;
3037  switch (VT.SimpleTy) {
3038  default: llvm_unreachable("Unexpected result type.");
3039  case MVT::i32: Opc = CvtOpc[AVXLevel][IsInputDouble][0]; break;
3040  case MVT::i64: Opc = CvtOpc[AVXLevel][IsInputDouble][1]; break;
3041  }
3042 
3043  // Check if we can fold insertelement instructions into the convert.
3044  const Value *Op = II->getArgOperand(0);
3045  while (auto *IE = dyn_cast<InsertElementInst>(Op)) {
3046  const Value *Index = IE->getOperand(2);
3047  if (!isa<ConstantInt>(Index))
3048  break;
3049  unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
3050 
3051  if (Idx == 0) {
3052  Op = IE->getOperand(1);
3053  break;
3054  }
3055  Op = IE->getOperand(0);
3056  }
3057 
3058  unsigned Reg = getRegForValue(Op);
3059  if (Reg == 0)
3060  return false;
3061 
3062  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3063  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3064  .addReg(Reg);
3065 
3066  updateValueMap(II, ResultReg);
3067  return true;
3068  }
3069  }
3070 }
3071 
3072 bool X86FastISel::fastLowerArguments() {
3073  if (!FuncInfo.CanLowerReturn)
3074  return false;
3075 
3076  const Function *F = FuncInfo.Fn;
3077  if (F->isVarArg())
3078  return false;
3079 
3080  CallingConv::ID CC = F->getCallingConv();
3081  if (CC != CallingConv::C)
3082  return false;
3083 
3084  if (Subtarget->isCallingConvWin64(CC))
3085  return false;
3086 
3087  if (!Subtarget->is64Bit())
3088  return false;
3089 
3090  if (Subtarget->useSoftFloat())
3091  return false;
3092 
3093  // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
3094  unsigned GPRCnt = 0;
3095  unsigned FPRCnt = 0;
3096  for (auto const &Arg : F->args()) {
3097  if (Arg.hasAttribute(Attribute::ByVal) ||
3098  Arg.hasAttribute(Attribute::InReg) ||
3099  Arg.hasAttribute(Attribute::StructRet) ||
3100  Arg.hasAttribute(Attribute::SwiftSelf) ||
3101  Arg.hasAttribute(Attribute::SwiftError) ||
3102  Arg.hasAttribute(Attribute::Nest))
3103  return false;
3104 
3105  Type *ArgTy = Arg.getType();
3106  if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
3107  return false;
3108 
3109  EVT ArgVT = TLI.getValueType(DL, ArgTy);
3110  if (!ArgVT.isSimple()) return false;
3111  switch (ArgVT.getSimpleVT().SimpleTy) {
3112  default: return false;
3113  case MVT::i32:
3114  case MVT::i64:
3115  ++GPRCnt;
3116  break;
3117  case MVT::f32:
3118  case MVT::f64:
3119  if (!Subtarget->hasSSE1())
3120  return false;
3121  ++FPRCnt;
3122  break;
3123  }
3124 
3125  if (GPRCnt > 6)
3126  return false;
3127 
3128  if (FPRCnt > 8)
3129  return false;
3130  }
3131 
3132  static const MCPhysReg GPR32ArgRegs[] = {
3133  X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
3134  };
3135  static const MCPhysReg GPR64ArgRegs[] = {
3136  X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
3137  };
3138  static const MCPhysReg XMMArgRegs[] = {
3139  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3140  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3141  };
3142 
3143  unsigned GPRIdx = 0;
3144  unsigned FPRIdx = 0;
3145  for (auto const &Arg : F->args()) {
3146  MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
3147  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
3148  unsigned SrcReg;
3149  switch (VT.SimpleTy) {
3150  default: llvm_unreachable("Unexpected value type.");
3151  case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break;
3152  case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break;
3153  case MVT::f32: LLVM_FALLTHROUGH;
3154  case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break;
3155  }
3156  unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3157  // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3158  // Without this, EmitLiveInCopies may eliminate the livein if its only
3159  // use is a bitcast (which isn't turned into an instruction).
3160  unsigned ResultReg = createResultReg(RC);
3161  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3162  TII.get(TargetOpcode::COPY), ResultReg)
3163  .addReg(DstReg, getKillRegState(true));
3164  updateValueMap(&Arg, ResultReg);
3165  }
3166  return true;
3167 }
3168 
3169 static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget,
3170  CallingConv::ID CC,
3171  ImmutableCallSite *CS) {
3172  if (Subtarget->is64Bit())
3173  return 0;
3174  if (Subtarget->getTargetTriple().isOSMSVCRT())
3175  return 0;
3176  if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3177  CC == CallingConv::HiPE)
3178  return 0;
3179 
3180  if (CS)
3181  if (CS->arg_empty() || !CS->paramHasAttr(0, Attribute::StructRet) ||
3182  CS->paramHasAttr(0, Attribute::InReg) || Subtarget->isTargetMCU())
3183  return 0;
3184 
3185  return 4;
3186 }
3187 
3188 bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3189  auto &OutVals = CLI.OutVals;
3190  auto &OutFlags = CLI.OutFlags;
3191  auto &OutRegs = CLI.OutRegs;
3192  auto &Ins = CLI.Ins;
3193  auto &InRegs = CLI.InRegs;
3194  CallingConv::ID CC = CLI.CallConv;
3195  bool &IsTailCall = CLI.IsTailCall;
3196  bool IsVarArg = CLI.IsVarArg;
3197  const Value *Callee = CLI.Callee;
3198  MCSymbol *Symbol = CLI.Symbol;
3199 
3200  bool Is64Bit = Subtarget->is64Bit();
3201  bool IsWin64 = Subtarget->isCallingConvWin64(CC);
3202 
3203  const CallInst *CI =
3204  CLI.CS ? dyn_cast<CallInst>(CLI.CS->getInstruction()) : nullptr;
3205  const Function *CalledFn = CI ? CI->getCalledFunction() : nullptr;
3206 
3207  // Call / invoke instructions with NoCfCheck attribute require special
3208  // handling.
3209  const auto *II =
3210  CLI.CS ? dyn_cast<InvokeInst>(CLI.CS->getInstruction()) : nullptr;
3211  if ((CI && CI->doesNoCfCheck()) || (II && II->doesNoCfCheck()))
3212  return false;
3213 
3214  // Functions with no_caller_saved_registers that need special handling.
3215  if ((CI && CI->hasFnAttr("no_caller_saved_registers")) ||
3216  (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers")))
3217  return false;
3218 
3219  // Functions using retpoline for indirect calls need to use SDISel.
3220  if (Subtarget->useRetpolineIndirectCalls())
3221  return false;
3222 
3223  // Handle only C, fastcc, and webkit_js calling conventions for now.
3224  switch (CC) {
3225  default: return false;
3226  case CallingConv::C:
3227  case CallingConv::Fast:
3229  case CallingConv::Swift:
3233  case CallingConv::Win64:
3235  break;
3236  }
3237 
3238  // Allow SelectionDAG isel to handle tail calls.
3239  if (IsTailCall)
3240  return false;
3241 
3242  // fastcc with -tailcallopt is intended to provide a guaranteed
3243  // tail call optimization. Fastisel doesn't know how to do that.
3244  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
3245  return false;
3246 
3247  // Don't know how to handle Win64 varargs yet. Nothing special needed for
3248  // x86-32. Special handling for x86-64 is implemented.
3249  if (IsVarArg && IsWin64)
3250  return false;
3251 
3252  // Don't know about inalloca yet.
3253  if (CLI.CS && CLI.CS->hasInAllocaArgument())
3254  return false;
3255 
3256  for (auto Flag : CLI.OutFlags)
3257  if (Flag.isSwiftError())
3258  return false;
3259 
3260  SmallVector<MVT, 16> OutVTs;
3261  SmallVector<unsigned, 16> ArgRegs;
3262 
3263  // If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra
3264  // instruction. This is safe because it is common to all FastISel supported
3265  // calling conventions on x86.
3266  for (int i = 0, e = OutVals.size(); i != e; ++i) {
3267  Value *&Val = OutVals[i];
3268  ISD::ArgFlagsTy Flags = OutFlags[i];
3269  if (auto *CI = dyn_cast<ConstantInt>(Val)) {
3270  if (CI->getBitWidth() < 32) {
3271  if (Flags.isSExt())
3273  else
3275  }
3276  }
3277 
3278  // Passing bools around ends up doing a trunc to i1 and passing it.
3279  // Codegen this as an argument + "and 1".
3280  MVT VT;
3281  auto *TI = dyn_cast<TruncInst>(Val);
3282  unsigned ResultReg;
3283  if (TI && TI->getType()->isIntegerTy(1) && CLI.CS &&
3284  (TI->getParent() == CLI.CS->getInstruction()->getParent()) &&
3285  TI->hasOneUse()) {
3286  Value *PrevVal = TI->getOperand(0);
3287  ResultReg = getRegForValue(PrevVal);
3288 
3289  if (!ResultReg)
3290  return false;
3291 
3292  if (!isTypeLegal(PrevVal->getType(), VT))
3293  return false;
3294 
3295  ResultReg =
3296  fastEmit_ri(VT, VT, ISD::AND, ResultReg, hasTrivialKill(PrevVal), 1);
3297  } else {
3298  if (!isTypeLegal(Val->getType(), VT))
3299  return false;
3300  ResultReg = getRegForValue(Val);
3301  }
3302 
3303  if (!ResultReg)
3304  return false;
3305 
3306  ArgRegs.push_back(ResultReg);
3307  OutVTs.push_back(VT);
3308  }
3309 
3310  // Analyze operands of the call, assigning locations to each operand.
3312  CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, CLI.RetTy->getContext());
3313 
3314  // Allocate shadow area for Win64
3315  if (IsWin64)
3316  CCInfo.AllocateStack(32, 8);
3317 
3318  CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
3319 
3320  // Get a count of how many bytes are to be pushed on the stack.
3321  unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3322 
3323  // Issue CALLSEQ_START
3324  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3325  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3326  .addImm(NumBytes).addImm(0).addImm(0);
3327 
3328  // Walk the register/memloc assignments, inserting copies/loads.
3329  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3330  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3331  CCValAssign const &VA = ArgLocs[i];
3332  const Value *ArgVal = OutVals[VA.getValNo()];
3333  MVT ArgVT = OutVTs[VA.getValNo()];
3334 
3335  if (ArgVT == MVT::x86mmx)
3336  return false;
3337 
3338  unsigned ArgReg = ArgRegs[VA.getValNo()];
3339 
3340  // Promote the value if needed.
3341  switch (VA.getLocInfo()) {
3342  case CCValAssign::Full: break;
3343  case CCValAssign::SExt: {
3344  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3345  "Unexpected extend");
3346 
3347  if (ArgVT == MVT::i1)
3348  return false;
3349 
3350  bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3351  ArgVT, ArgReg);
3352  assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
3353  ArgVT = VA.getLocVT();
3354  break;
3355  }
3356  case CCValAssign::ZExt: {
3357  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3358  "Unexpected extend");
3359 
3360  // Handle zero-extension from i1 to i8, which is common.
3361  if (ArgVT == MVT::i1) {
3362  // Set the high bits to zero.
3363  ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg, /*TODO: Kill=*/false);
3364  ArgVT = MVT::i8;
3365 
3366  if (ArgReg == 0)
3367  return false;
3368  }
3369 
3370  bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3371  ArgVT, ArgReg);
3372  assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
3373  ArgVT = VA.getLocVT();
3374  break;
3375  }
3376  case CCValAssign::AExt: {
3377  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3378  "Unexpected extend");
3379  bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), ArgReg,
3380  ArgVT, ArgReg);
3381  if (!Emitted)
3382  Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3383  ArgVT, ArgReg);
3384  if (!Emitted)
3385  Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3386  ArgVT, ArgReg);
3387 
3388  assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
3389  ArgVT = VA.getLocVT();
3390  break;
3391  }
3392  case CCValAssign::BCvt: {
3393  ArgReg = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg,
3394  /*TODO: Kill=*/false);
3395  assert(ArgReg && "Failed to emit a bitcast!");
3396  ArgVT = VA.getLocVT();
3397  break;
3398  }
3399  case CCValAssign::VExt:
3400  // VExt has not been implemented, so this should be impossible to reach
3401  // for now. However, fallback to Selection DAG isel once implemented.
3402  return false;
3406  case CCValAssign::FPExt:
3407  llvm_unreachable("Unexpected loc info!");
3408  case CCValAssign::Indirect:
3409  // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
3410  // support this.
3411  return false;
3412  }
3413 
3414  if (VA.isRegLoc()) {
3415  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3416  TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3417  OutRegs.push_back(VA.getLocReg());
3418  } else {
3419  assert(VA.isMemLoc());
3420 
3421  // Don't emit stores for undef values.
3422  if (isa<UndefValue>(ArgVal))
3423  continue;
3424 
3425  unsigned LocMemOffset = VA.getLocMemOffset();
3426  X86AddressMode AM;
3427  AM.Base.Reg = RegInfo->getStackRegister();
3428  AM.Disp = LocMemOffset;
3429  ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()];
3430  unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3431  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3432  MachinePointerInfo::getStack(*FuncInfo.MF, LocMemOffset),
3433  MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3434  if (Flags.isByVal()) {
3435  X86AddressMode SrcAM;
3436  SrcAM.Base.Reg = ArgReg;
3437  if (!TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize()))
3438  return false;
3439  } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
3440  // If this is a really simple value, emit this with the Value* version
3441  // of X86FastEmitStore. If it isn't simple, we don't want to do this,
3442  // as it can cause us to reevaluate the argument.
3443  if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO))
3444  return false;
3445  } else {
3446  bool ValIsKill = hasTrivialKill(ArgVal);
3447  if (!X86FastEmitStore(ArgVT, ArgReg, ValIsKill, AM, MMO))
3448  return false;
3449  }
3450  }
3451  }
3452 
3453  // ELF / PIC requires GOT in the EBX register before function calls via PLT
3454  // GOT pointer.
3455  if (Subtarget->isPICStyleGOT()) {
3456  unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3457  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3458  TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base);
3459  }
3460 
3461  if (Is64Bit && IsVarArg && !IsWin64) {
3462  // From AMD64 ABI document:
3463  // For calls that may call functions that use varargs or stdargs
3464  // (prototype-less calls or calls to functions containing ellipsis (...) in
3465  // the declaration) %al is used as hidden argument to specify the number
3466  // of SSE registers used. The contents of %al do not need to match exactly
3467  // the number of registers, but must be an ubound on the number of SSE
3468  // registers used and is in the range 0 - 8 inclusive.
3469 
3470  // Count the number of XMM registers allocated.
3471  static const MCPhysReg XMMArgRegs[] = {
3472  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3473  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3474  };
3475  unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3476  assert((Subtarget->hasSSE1() || !NumXMMRegs)
3477  && "SSE registers cannot be used when SSE is disabled");
3478  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
3479  X86::AL).addImm(NumXMMRegs);
3480  }
3481 
3482  // Materialize callee address in a register. FIXME: GV address can be
3483  // handled with a CALLpcrel32 instead.
3484  X86AddressMode CalleeAM;
3485  if (!X86SelectCallAddress(Callee, CalleeAM))
3486  return false;
3487 
3488  unsigned CalleeOp = 0;
3489  const GlobalValue *GV = nullptr;
3490  if (CalleeAM.GV != nullptr) {
3491  GV = CalleeAM.GV;
3492  } else if (CalleeAM.Base.Reg != 0) {
3493  CalleeOp = CalleeAM.Base.Reg;
3494  } else
3495  return false;
3496 
3497  // Issue the call.
3498  MachineInstrBuilder MIB;
3499  if (CalleeOp) {
3500  // Register-indirect call.
3501  unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r;
3502  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc))
3503  .addReg(CalleeOp);
3504  } else {
3505  // Direct call.
3506  assert(GV && "Not a direct call");
3507  // See if we need any target-specific flags on the GV operand.
3508  unsigned char OpFlags = Subtarget->classifyGlobalFunctionReference(GV);
3509 
3510  // This will be a direct call, or an indirect call through memory for
3511  // NonLazyBind calls or dllimport calls.
3512  bool NeedLoad =
3513  OpFlags == X86II::MO_DLLIMPORT || OpFlags == X86II::MO_GOTPCREL;
3514  unsigned CallOpc = NeedLoad
3515  ? (Is64Bit ? X86::CALL64m : X86::CALL32m)
3516  : (Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32);
3517 
3518  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
3519  if (NeedLoad)
3520  MIB.addReg(Is64Bit ? X86::RIP : 0).addImm(1).addReg(0);
3521  if (Symbol)
3522  MIB.addSym(Symbol, OpFlags);
3523  else
3524  MIB.addGlobalAddress(GV, 0, OpFlags);
3525  if (NeedLoad)
3526  MIB.addReg(0);
3527  }
3528 
3529  // Add a register mask operand representing the call-preserved registers.
3530  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3531  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3532 
3533  // Add an implicit use GOT pointer in EBX.
3534  if (Subtarget->isPICStyleGOT())
3536 
3537  if (Is64Bit && IsVarArg && !IsWin64)
3539 
3540  // Add implicit physical register uses to the call.
3541  for (auto Reg : OutRegs)
3543 
3544  // Issue CALLSEQ_END
3545  unsigned NumBytesForCalleeToPop =
3546  X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg,
3547  TM.Options.GuaranteedTailCallOpt)
3548  ? NumBytes // Callee pops everything.
3549  : computeBytesPoppedByCalleeForSRet(Subtarget, CC, CLI.CS);
3550  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3551  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3552  .addImm(NumBytes).addImm(NumBytesForCalleeToPop);
3553 
3554  // Now handle call return values.
3556  CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs,
3557  CLI.RetTy->getContext());
3558  CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
3559 
3560  // Copy all of the result registers out of their specified physreg.
3561  unsigned ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3562  for (unsigned i = 0; i != RVLocs.size(); ++i) {
3563  CCValAssign &VA = RVLocs[i];
3564  EVT CopyVT = VA.getValVT();
3565  unsigned CopyReg = ResultReg + i;
3566  unsigned SrcReg = VA.getLocReg();
3567 
3568  // If this is x86-64, and we disabled SSE, we can't return FP values
3569  if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
3570  ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
3571  report_fatal_error("SSE register return with SSE disabled");
3572  }
3573 
3574  // If we prefer to use the value in xmm registers, copy it out as f80 and
3575  // use a truncate to move it from fp stack reg to xmm reg.
3576  if ((SrcReg == X86::FP0 || SrcReg == X86::FP1) &&
3577  isScalarFPTypeInSSEReg(VA.getValVT())) {
3578  CopyVT = MVT::f80;
3579  CopyReg = createResultReg(&X86::RFP80RegClass);
3580  }
3581 
3582  // Copy out the result.
3583  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3584  TII.get(TargetOpcode::COPY), CopyReg).addReg(SrcReg);
3585  InRegs.push_back(VA.getLocReg());
3586 
3587  // Round the f80 to the right size, which also moves it to the appropriate
3588  // xmm register. This is accomplished by storing the f80 value in memory
3589  // and then loading it back.
3590  if (CopyVT != VA.getValVT()) {
3591  EVT ResVT = VA.getValVT();
3592  unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
3593  unsigned MemSize = ResVT.getSizeInBits()/8;
3594  int FI = MFI.CreateStackObject(MemSize, MemSize, false);
3595  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3596  TII.get(Opc)), FI)
3597  .addReg(CopyReg);
3598  Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
3599  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3600  TII.get(Opc), ResultReg + i), FI);
3601  }
3602  }
3603 
3604  CLI.ResultReg = ResultReg;
3605  CLI.NumResultRegs = RVLocs.size();
3606  CLI.Call = MIB;
3607 
3608  return true;
3609 }
3610 
3611 bool
3612 X86FastISel::fastSelectInstruction(const Instruction *I) {
3613  switch (I->getOpcode()) {
3614  default: break;
3615  case Instruction::Load:
3616  return X86SelectLoad(I);
3617  case Instruction::Store:
3618  return X86SelectStore(I);
3619  case Instruction::Ret:
3620  return X86SelectRet(I);
3621  case Instruction::ICmp:
3622  case Instruction::FCmp:
3623  return X86SelectCmp(I);
3624  case Instruction::ZExt:
3625  return X86SelectZExt(I);
3626  case Instruction::SExt:
3627  return X86SelectSExt(I);
3628  case Instruction::Br:
3629  return X86SelectBranch(I);
3630  case Instruction::LShr:
3631  case Instruction::AShr:
3632  case Instruction::Shl:
3633  return X86SelectShift(I);
3634  case Instruction::SDiv:
3635  case Instruction::UDiv:
3636  case Instruction::SRem:
3637  case Instruction::URem:
3638  return X86SelectDivRem(I);
3639  case Instruction::Select:
3640  return X86SelectSelect(I);
3641  case Instruction::Trunc:
3642  return X86SelectTrunc(I);
3643  case Instruction::FPExt:
3644  return X86SelectFPExt(I);
3645  case Instruction::FPTrunc:
3646  return X86SelectFPTrunc(I);
3647  case Instruction::SIToFP:
3648  return X86SelectSIToFP(I);
3649  case Instruction::UIToFP:
3650  return X86SelectUIToFP(I);
3651  case Instruction::IntToPtr: // Deliberate fall-through.
3652  case Instruction::PtrToInt: {
3653  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
3654  EVT DstVT = TLI.getValueType(DL, I->getType());
3655  if (DstVT.bitsGT(SrcVT))
3656  return X86SelectZExt(I);
3657  if (DstVT.bitsLT(SrcVT))
3658  return X86SelectTrunc(I);
3659  unsigned Reg = getRegForValue(I->getOperand(0));
3660  if (Reg == 0) return false;
3661  updateValueMap(I, Reg);
3662  return true;
3663  }
3664  case Instruction::BitCast: {
3665  // Select SSE2/AVX bitcasts between 128/256 bit vector types.
3666  if (!Subtarget->hasSSE2())
3667  return false;
3668 
3669  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
3670  EVT DstVT = TLI.getValueType(DL, I->getType());
3671 
3672  if (!SrcVT.isSimple() || !DstVT.isSimple())
3673  return false;
3674 
3675  MVT SVT = SrcVT.getSimpleVT();
3676  MVT DVT = DstVT.getSimpleVT();
3677 
3678  if (!SVT.is128BitVector() &&
3679  !(Subtarget->hasAVX() && SVT.is256BitVector()) &&
3680  !(Subtarget->hasAVX512() && SVT.is512BitVector() &&
3681  (Subtarget->hasBWI() || (SVT.getScalarSizeInBits() >= 32 &&
3682  DVT.getScalarSizeInBits() >= 32))))
3683  return false;
3684 
3685  unsigned Reg = getRegForValue(I->getOperand(0));
3686  if (Reg == 0)
3687  return false;
3688 
3689  // No instruction is needed for conversion. Reuse the register used by
3690  // the fist operand.
3691  updateValueMap(I, Reg);
3692  return true;
3693  }
3694  }
3695 
3696  return false;
3697 }
3698 
3699 unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
3700  if (VT > MVT::i64)
3701  return 0;
3702 
3703  uint64_t Imm = CI->getZExtValue();
3704  if (Imm == 0) {
3705  unsigned SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
3706  switch (VT.SimpleTy) {
3707  default: llvm_unreachable("Unexpected value type");
3708  case MVT::i1:
3709  case MVT::i8:
3710  return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true,
3711  X86::sub_8bit);
3712  case MVT::i16:
3713  return fastEmitInst_extractsubreg(MVT::i16, SrcReg, /*Kill=*/true,
3714  X86::sub_16bit);
3715  case MVT::i32:
3716  return SrcReg;
3717  case MVT::i64: {
3718  unsigned ResultReg = createResultReg(&X86::GR64RegClass);
3719  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3720  TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
3721  .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
3722  return ResultReg;
3723  }
3724  }
3725  }
3726 
3727  unsigned Opc = 0;
3728  switch (VT.SimpleTy) {
3729  default: llvm_unreachable("Unexpected value type");
3730  case MVT::i1:
3731  VT = MVT::i8;
3733  case MVT::i8: Opc = X86::MOV8ri; break;
3734  case MVT::i16: Opc = X86::MOV16ri; break;
3735  case MVT::i32: Opc = X86::MOV32ri; break;
3736  case MVT::i64: {
3737  if (isUInt<32>(Imm))
3738  Opc = X86::MOV32ri64;
3739  else if (isInt<32>(Imm))
3740  Opc = X86::MOV64ri32;
3741  else
3742  Opc = X86::MOV64ri;
3743  break;
3744  }
3745  }
3746  return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
3747 }
3748 
3749 unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
3750  if (CFP->isNullValue())
3751  return fastMaterializeFloatZero(CFP);
3752 
3753  // Can't handle alternate code models yet.
3754  CodeModel::Model CM = TM.getCodeModel();
3755  if (CM != CodeModel::Small && CM != CodeModel::Large)
3756  return 0;
3757 
3758  // Get opcode and regclass of the output for the given load instruction.
3759  unsigned Opc = 0;
3760  const TargetRegisterClass *RC = nullptr;
3761  switch (VT.SimpleTy) {
3762  default: return 0;
3763  case MVT::f32:
3764  if (X86ScalarSSEf32) {
3765  Opc = Subtarget->hasAVX512()
3766  ? X86::VMOVSSZrm
3767  : Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
3768  RC = Subtarget->hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
3769  } else {
3770  Opc = X86::LD_Fp32m;
3771  RC = &X86::RFP32RegClass;
3772  }
3773  break;
3774  case MVT::f64:
3775  if (X86ScalarSSEf64) {
3776  Opc = Subtarget->hasAVX512()
3777  ? X86::VMOVSDZrm
3778  : Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
3779  RC = Subtarget->hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
3780  } else {
3781  Opc = X86::LD_Fp64m;
3782  RC = &X86::RFP64RegClass;
3783  }
3784  break;
3785  case MVT::f80:
3786  // No f80 support yet.
3787  return 0;
3788  }
3789 
3790  // MachineConstantPool wants an explicit alignment.
3791  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
3792  if (Align == 0) {
3793  // Alignment of vector types. FIXME!
3794  Align = DL.getTypeAllocSize(CFP->getType());
3795  }
3796 
3797  // x86-32 PIC requires a PIC base register for constant pools.
3798  unsigned PICBase = 0;
3799  unsigned char OpFlag = Subtarget->classifyLocalReference(nullptr);
3800  if (OpFlag == X86II::MO_PIC_BASE_OFFSET)
3801  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3802  else if (OpFlag == X86II::MO_GOTOFF)
3803  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3804  else if (Subtarget->is64Bit() && TM.getCodeModel() == CodeModel::Small)
3805  PICBase = X86::RIP;
3806 
3807  // Create the load from the constant pool.
3808  unsigned CPI = MCP.getConstantPoolIndex(CFP, Align);
3809  unsigned ResultReg = createResultReg(RC);
3810 
3811  if (CM == CodeModel::Large) {
3812  unsigned AddrReg = createResultReg(&X86::GR64RegClass);
3813  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
3814  AddrReg)
3815  .addConstantPoolIndex(CPI, 0, OpFlag);
3816  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3817  TII.get(Opc), ResultReg);
3818  addDirectMem(MIB, AddrReg);
3819  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3821  MachineMemOperand::MOLoad, DL.getPointerSize(), Align);
3822  MIB->addMemOperand(*FuncInfo.MF, MMO);
3823  return ResultReg;
3824  }
3825 
3826  addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3827  TII.get(Opc), ResultReg),
3828  CPI, PICBase, OpFlag);
3829  return ResultReg;
3830 }
3831 
3832 unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
3833  // Can't handle alternate code models yet.
3834  if (TM.getCodeModel() != CodeModel::Small)
3835  return 0;
3836 
3837  // Materialize addresses with LEA/MOV instructions.
3838  X86AddressMode AM;
3839  if (X86SelectAddress(GV, AM)) {
3840  // If the expression is just a basereg, then we're done, otherwise we need
3841  // to emit an LEA.
3842  if (AM.BaseType == X86AddressMode::RegBase &&
3843  AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
3844  return AM.Base.Reg;
3845 
3846  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3847  if (TM.getRelocationModel() == Reloc::Static &&
3848  TLI.getPointerTy(DL) == MVT::i64) {
3849  // The displacement code could be more than 32 bits away so we need to use
3850  // an instruction with a 64 bit immediate
3851  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
3852  ResultReg)
3853  .addGlobalAddress(GV);
3854  } else {
3855  unsigned Opc =
3856  TLI.getPointerTy(DL) == MVT::i32
3857  ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3858  : X86::LEA64r;
3859  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3860  TII.get(Opc), ResultReg), AM);
3861  }
3862  return ResultReg;
3863  }
3864  return 0;
3865 }
3866 
3867 unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
3868  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
3869 
3870  // Only handle simple types.
3871  if (!CEVT.isSimple())
3872  return 0;
3873  MVT VT = CEVT.getSimpleVT();
3874 
3875  if (const auto *CI = dyn_cast<ConstantInt>(C))
3876  return X86MaterializeInt(CI, VT);
3877  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
3878  return X86MaterializeFP(CFP, VT);
3879  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
3880  return X86MaterializeGV(GV, VT);
3881 
3882  return 0;
3883 }
3884 
3885 unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) {
3886  // Fail on dynamic allocas. At this point, getRegForValue has already
3887  // checked its CSE maps, so if we're here trying to handle a dynamic
3888  // alloca, we're not going to succeed. X86SelectAddress has a
3889  // check for dynamic allocas, because it's called directly from
3890  // various places, but targetMaterializeAlloca also needs a check
3891  // in order to avoid recursion between getRegForValue,
3892  // X86SelectAddrss, and targetMaterializeAlloca.
3893  if (!FuncInfo.StaticAllocaMap.count(C))
3894  return 0;
3895  assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?");
3896 
3897  X86AddressMode AM;
3898  if (!X86SelectAddress(C, AM))
3899  return 0;
3900  unsigned Opc =
3901  TLI.getPointerTy(DL) == MVT::i32
3902  ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3903  : X86::LEA64r;
3904  const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
3905  unsigned ResultReg = createResultReg(RC);
3906  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3907  TII.get(Opc), ResultReg), AM);
3908  return ResultReg;
3909 }
3910 
3911 unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) {
3912  MVT VT;
3913  if (!isTypeLegal(CF->getType(), VT))
3914  return 0;
3915 
3916  // Get opcode and regclass for the given zero.
3917  bool HasAVX512 = Subtarget->hasAVX512();
3918  unsigned Opc = 0;
3919  const TargetRegisterClass *RC = nullptr;
3920  switch (VT.SimpleTy) {
3921  default: return 0;
3922  case MVT::f32:
3923  if (X86ScalarSSEf32) {
3924  Opc = HasAVX512 ? X86::AVX512_FsFLD0SS : X86::FsFLD0SS;
3925  RC = HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass;
3926  } else {
3927  Opc = X86::LD_Fp032;
3928  RC = &X86::RFP32RegClass;
3929  }
3930  break;
3931  case MVT::f64:
3932  if (X86ScalarSSEf64) {
3933  Opc = HasAVX512 ? X86::AVX512_FsFLD0SD : X86::FsFLD0SD;
3934  RC = HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass;
3935  } else {
3936  Opc = X86::LD_Fp064;
3937  RC = &X86::RFP64RegClass;
3938  }
3939  break;
3940  case MVT::f80:
3941  // No f80 support yet.
3942  return 0;
3943  }
3944 
3945  unsigned ResultReg = createResultReg(RC);
3946  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
3947  return ResultReg;
3948 }
3949 
3950 
3951 bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
3952  const LoadInst *LI) {
3953  const Value *Ptr = LI->getPointerOperand();
3954  X86AddressMode AM;
3955  if (!X86SelectAddress(Ptr, AM))
3956  return false;
3957 
3958  const X86InstrInfo &XII = (const X86InstrInfo &)TII;
3959 
3960  unsigned Size = DL.getTypeAllocSize(LI->getType());
3961  unsigned Alignment = LI->getAlignment();
3962 
3963  if (Alignment == 0) // Ensure that codegen never sees alignment 0
3964  Alignment = DL.getABITypeAlignment(LI->getType());
3965 
3967  AM.getFullAddress(AddrOps);
3968 
3969  MachineInstr *Result = XII.foldMemoryOperandImpl(
3970  *FuncInfo.MF, *MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, Alignment,
3971  /*AllowCommute=*/true);
3972  if (!Result)
3973  return false;
3974 
3975  // The index register could be in the wrong register class. Unfortunately,
3976  // foldMemoryOperandImpl could have commuted the instruction so its not enough
3977  // to just look at OpNo + the offset to the index reg. We actually need to
3978  // scan the instruction to find the index reg and see if its the correct reg
3979  // class.
3980  unsigned OperandNo = 0;
3981  for (MachineInstr::mop_iterator I = Result->operands_begin(),
3982  E = Result->operands_end(); I != E; ++I, ++OperandNo) {
3983  MachineOperand &MO = *I;
3984  if (!MO.isReg() || MO.isDef() || MO.getReg() != AM.IndexReg)
3985  continue;
3986  // Found the index reg, now try to rewrite it.
3987  unsigned IndexReg = constrainOperandRegClass(Result->getDesc(),
3988  MO.getReg(), OperandNo);
3989  if (IndexReg == MO.getReg())
3990  continue;
3991  MO.setReg(IndexReg);
3992  }
3993 
3994  Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
3996  removeDeadCode(I, std::next(I));
3997  return true;
3998 }
3999 
4000 unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode,
4001  const TargetRegisterClass *RC,
4002  unsigned Op0, bool Op0IsKill,
4003  unsigned Op1, bool Op1IsKill,
4004  unsigned Op2, bool Op2IsKill,
4005  unsigned Op3, bool Op3IsKill) {
4006  const MCInstrDesc &II = TII.get(MachineInstOpcode);
4007 
4008  unsigned ResultReg = createResultReg(RC);
4009  Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
4010  Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
4011  Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
4012  Op3 = constrainOperandRegClass(II, Op3, II.getNumDefs() + 3);
4013 
4014  if (II.getNumDefs() >= 1)
4015  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
4016  .addReg(Op0, getKillRegState(Op0IsKill))
4017  .addReg(Op1, getKillRegState(Op1IsKill))
4018  .addReg(Op2, getKillRegState(Op2IsKill))
4019  .addReg(Op3, getKillRegState(Op3IsKill));
4020  else {
4021  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
4022  .addReg(Op0, getKillRegState(Op0IsKill))
4023  .addReg(Op1, getKillRegState(Op1IsKill))
4024  .addReg(Op2, getKillRegState(Op2IsKill))
4025  .addReg(Op3, getKillRegState(Op3IsKill));
4026  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4027  TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
4028  }
4029  return ResultReg;
4030 }
4031 
4032 
4033 namespace llvm {
4035  const TargetLibraryInfo *libInfo) {
4036  return new X86FastISel(funcInfo, libInfo);
4037  }
4038 }
bool hasAVX() const
Definition: X86Subtarget.h:560
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:177
void setFrameAddressIsTaken(bool T)
unsigned GetCondBranchFromCond(CondCode CC)
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:571
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:349
Return a value (possibly void), from a function.
Value * getValueOperand()
Definition: Instructions.h:410
const MachineInstrBuilder & addMetadata(const MDNode *MD) const
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:522
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:584
mop_iterator operands_end()
Definition: MachineInstr.h:454
bool isInteger() const
Return true if this is an integer or a vector integer type.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:636
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:35
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
bool contains(unsigned Reg) const
Return true if the specified register is included in this register class.
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:561
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
This class represents lattice values for constants.
Definition: AllocatorList.h:24
static const MachineInstrBuilder & addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI, unsigned GlobalBaseReg, unsigned char OpFlags)
addConstantPoolReference - This function is used to add a reference to the base of a constant value s...
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:42
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
bool isVector() const
Return true if this is a vector value type.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:164
union llvm::X86AddressMode::@497 Base
unsigned getReg() const
getReg - Returns the register number.
bool is256BitVector() const
Return true if this is a 256-bit vector type.
This class represents a function call, abstracting a target machine&#39;s calling convention.
unsigned Reg
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
The C convention as implemented on Windows/x86-64 and AArch64.
Definition: CallingConv.h:154
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:652
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:303
unsigned getSourceAddressSpace() const
unsigned getValNo() const
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:705
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:662
This class wraps the llvm.memset intrinsic.
BasicBlock * getSuccessor(unsigned i) const
unsigned const TargetRegisterInfo * TRI
const GlobalValue * GV
A debug info location.
Definition: DebugLoc.h:34
F(f)
unsigned getPtrSizedFrameRegister(const MachineFunction &MF) const
An instruction for reading from memory.
Definition: Instructions.h:168
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:110
Hexagon Common GEP
Value * getCondition() const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO)
Determines whether the callee is required to pop its own arguments.
Value * getLength() const
op_iterator op_begin()
Definition: User.h:230
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:39
bool isMemLoc() const
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:265
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
bool arg_empty() const
Definition: CallSite.h:218
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1135
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:657
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:529
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:656
A description of a memory reference used in the backend.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:745
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:211
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:412
Class to represent struct types.
Definition: DerivedTypes.h:201
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
static bool isGlobalStubReference(unsigned char TargetFlag)
isGlobalStubReference - Return true if the specified TargetFlag operand is a reference to a stub for ...
Definition: X86InstrInfo.h:113
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:653
SimpleValueType SimpleTy
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1651
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
LocInfo getLocInfo() const
unsigned getSizeInBits() const
enum llvm::X86AddressMode::@496 BaseType
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:267
static bool isGlobalRelativeToPICBase(unsigned char TargetFlag)
isGlobalRelativeToPICBase - Return true if the specified global value reference is relative to a 32-b...
Definition: X86InstrInfo.h:130
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1665
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:85
This is a fast-path instruction selection class that generates poor code and doesn&#39;t support illegal ...
Definition: FastISel.h:67
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:889
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:406
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
This instruction compares its operands according to the predicate given to the constructor.
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Return true if the call or the callee has the given attribute.
Definition: CallSite.h:377
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
An instruction for storing to memory.
Definition: Instructions.h:321
bool doesNoCfCheck() const
Determine if the call should not perform indirect branch tracking.
Definition: InstrTypes.h:1552
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
amdgpu Simplify well known AMD library false Value * Callee
This class represents a truncation of integer types.
Value * getOperand(unsigned i) const
Definition: User.h:170
Class to represent pointers.
Definition: DerivedTypes.h:467
unsigned getByValSize() const
unsigned getKillRegState(bool B)
unsigned getStackRegister() const
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:118
bool is128BitVector() const
Return true if this is a 128-bit vector type.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:229
bool isFloatTy() const
Return true if this is &#39;float&#39;, a 32-bit IEEE fp type.
Definition: Type.h:147
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:854
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:169
std::pair< CondCode, bool > getX86ConditionCode(CmpInst::Predicate Predicate)
Return a pair of condition code for the given predicate and whether the instruction operands should b...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
unsigned const MachineRegisterInfo * MRI
Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Conditional or Unconditional Branch instruction.
Value * getAddress() const
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
unsigned getScalarSizeInBits() const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
const MCPhysReg * ImplicitDefs
Definition: MCInstrDesc.h:174
DIExpression * getExpression() const
bool isValidLocationForIntrinsic(const DILocation *DL) const
Check that a location is valid for this variable.
op_iterator op_end()
Definition: User.h:232
static std::pair< unsigned, bool > getX86SSEConditionCode(CmpInst::Predicate Predicate)
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:646
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:767
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:655
Value * getPointerOperand()
Definition: Instructions.h:285
The C convention as specified in the x86-64 supplement to the System V ABI, used on most non-Windows ...
Definition: CallingConv.h:144
const Triple & getTargetTriple() const
Definition: X86Subtarget.h:721
static void X86SelectAddress(const MachineInstr &I, const MachineRegisterInfo &MRI, X86AddressMode &AM)
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:663
unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand=false)
Return a set opcode for the given condition and whether it has a memory operand.
bool isTargetMCU() const
Definition: X86Subtarget.h:740
Extended Value Type.
Definition: ValueTypes.h:34
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
size_t size() const
Definition: SmallVector.h:53
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Definition: InstrTypes.h:1251
bool isVolatile() const
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:661
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
The memory access writes data.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags...
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:650
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:192
CCState - This class holds information needed while lowering arguments and return values...
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:309
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
X86_ThisCall - Similar to X86_StdCall.
Definition: CallingConv.h:111
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:660
Provides information about what library functions are available for the current target.
CCValAssign - Represent assignment of one arg/retval to a location.
void getFullAddress(SmallVectorImpl< MachineOperand > &MO)
This class wraps the llvm.memcpy intrinsic.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:226
Value * getRawSource() const
Return the arguments to the instruction.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
X86_FastCall - &#39;fast&#39; analog of X86_StdCall.
Definition: CallingConv.h:92
This file defines the FastISel class.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:241
unsigned getCMovFromCond(CondCode CC, unsigned RegBytes, bool HasMemoryOperand=false)
Return a cmov opcode for the given condition, register size in bytes, and operand type...
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:468
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:471
amdgpu Simplify well known AMD library false Value Value * Arg
The memory access reads data.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr) const override
foldMemoryOperand - If this target supports it, fold a load or store of the specified stack slot into...
Representation of each machine instruction.
Definition: MachineInstr.h:64
MO_GOTOFF - On a symbol operand this indicates that the immediate is the offset to the location of th...
Definition: X86BaseInfo.h:102
static const MachineInstrBuilder & addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg)
addDirectMem - This function is used to add a direct memory reference to the current instruction – t...
unsigned getNumArgOperands() const
Definition: InstrTypes.h:1133
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:311
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:387
unsigned getDestAddressSpace() const
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:551
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:241
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
unsigned getLocMemOffset() const
Establish a view to a call site for examination.
Definition: CallSite.h:711
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1181
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:58
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:654
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
bool is512BitVector() const
Return true if this is a 512-bit vector type.
uint32_t Size
Definition: Profile.cpp:47
DILocalVariable * getVariable() const
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:658
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:346
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:366
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegLoc() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
X86_StdCall - stdcall is the calling conventions mostly used by the Win32 API.
Definition: CallingConv.h:87
bool hasSSE1() const
Definition: X86Subtarget.h:554
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:649
LLVM Value Representation.
Definition: Value.h:73
mop_iterator operands_begin()
Definition: MachineInstr.h:453
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:659
static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC)
If we have a comparison with RHS as the RHS of the comparison, return an opcode that works for the co...
bool hasAVX512() const
Definition: X86Subtarget.h:562
Invoke instruction.
MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the immediate should get the value of th...
Definition: X86BaseInfo.h:88
IRTranslator LLVM IR MI
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
unsigned constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II, const MachineOperand &RegMO, unsigned OpIdx)
Try to constrain Reg so that it is usable by argument OpIdx of the provided MCInstrDesc II...
Definition: Utils.cpp:47
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
Conversion operators.
Definition: ISDOpcodes.h:465
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size...
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:157
X86AddressMode - This struct holds a generalized full x86 address mode.
unsigned getLocReg() const
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:126
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:651
This represents the llvm.dbg.declare instruction.
bool isDoubleTy() const
Return true if this is &#39;double&#39;, a 64-bit IEEE fp type.
Definition: Type.h:150
Value * getPointerOperand()
Definition: Instructions.h:413
unsigned AllocateStack(unsigned Size, unsigned Align)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
Value * getRawDest() const
static const MachineInstrBuilder & addFullAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM)
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:375
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
bool hasSSE2() const
Definition: X86Subtarget.h:555
iterator_range< arg_iterator > args()
Definition: Function.h:689
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:648
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:218
MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the reference is actually to the "__imp...
Definition: X86BaseInfo.h:200
static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget, CallingConv::ID CC, ImmutableCallSite *CS)
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:221
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:67
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget)
an instruction to allocate memory on the stack
Definition: Instructions.h:60
gep_type_iterator gep_type_begin(const User *GEP)