LLVM  8.0.1
X86ISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a DAG pattern matching instruction selector for X86,
11 // converting from a legalized dag to a X86 dag.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86.h"
16 #include "X86MachineFunctionInfo.h"
17 #include "X86RegisterInfo.h"
18 #include "X86Subtarget.h"
19 #include "X86TargetMachine.h"
20 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Config/llvm-config.h"
25 #include "llvm/IR/ConstantRange.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/IR/Instructions.h"
28 #include "llvm/IR/Intrinsics.h"
29 #include "llvm/IR/Type.h"
30 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/KnownBits.h"
37 #include <stdint.h>
38 using namespace llvm;
39 
40 #define DEBUG_TYPE "x86-isel"
41 
42 STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
43 
44 static cl::opt<bool> AndImmShrink("x86-and-imm-shrink", cl::init(true),
45  cl::desc("Enable setting constant bits to reduce size of mask immediates"),
46  cl::Hidden);
47 
48 //===----------------------------------------------------------------------===//
49 // Pattern Matcher Implementation
50 //===----------------------------------------------------------------------===//
51 
52 namespace {
53  /// This corresponds to X86AddressMode, but uses SDValue's instead of register
54  /// numbers for the leaves of the matched tree.
55  struct X86ISelAddressMode {
56  enum {
57  RegBase,
58  FrameIndexBase
59  } BaseType;
60 
61  // This is really a union, discriminated by BaseType!
62  SDValue Base_Reg;
63  int Base_FrameIndex;
64 
65  unsigned Scale;
66  SDValue IndexReg;
67  int32_t Disp;
68  SDValue Segment;
69  const GlobalValue *GV;
70  const Constant *CP;
71  const BlockAddress *BlockAddr;
72  const char *ES;
73  MCSymbol *MCSym;
74  int JT;
75  unsigned Align; // CP alignment.
76  unsigned char SymbolFlags; // X86II::MO_*
77 
78  X86ISelAddressMode()
79  : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
80  Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr),
81  MCSym(nullptr), JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {}
82 
83  bool hasSymbolicDisplacement() const {
84  return GV != nullptr || CP != nullptr || ES != nullptr ||
85  MCSym != nullptr || JT != -1 || BlockAddr != nullptr;
86  }
87 
88  bool hasBaseOrIndexReg() const {
89  return BaseType == FrameIndexBase ||
90  IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
91  }
92 
93  /// Return true if this addressing mode is already RIP-relative.
94  bool isRIPRelative() const {
95  if (BaseType != RegBase) return false;
96  if (RegisterSDNode *RegNode =
97  dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
98  return RegNode->getReg() == X86::RIP;
99  return false;
100  }
101 
102  void setBaseReg(SDValue Reg) {
103  BaseType = RegBase;
104  Base_Reg = Reg;
105  }
106 
107 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
108  void dump(SelectionDAG *DAG = nullptr) {
109  dbgs() << "X86ISelAddressMode " << this << '\n';
110  dbgs() << "Base_Reg ";
111  if (Base_Reg.getNode())
112  Base_Reg.getNode()->dump(DAG);
113  else
114  dbgs() << "nul\n";
115  if (BaseType == FrameIndexBase)
116  dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n';
117  dbgs() << " Scale " << Scale << '\n'
118  << "IndexReg ";
119  if (IndexReg.getNode())
120  IndexReg.getNode()->dump(DAG);
121  else
122  dbgs() << "nul\n";
123  dbgs() << " Disp " << Disp << '\n'
124  << "GV ";
125  if (GV)
126  GV->dump();
127  else
128  dbgs() << "nul";
129  dbgs() << " CP ";
130  if (CP)
131  CP->dump();
132  else
133  dbgs() << "nul";
134  dbgs() << '\n'
135  << "ES ";
136  if (ES)
137  dbgs() << ES;
138  else
139  dbgs() << "nul";
140  dbgs() << " MCSym ";
141  if (MCSym)
142  dbgs() << MCSym;
143  else
144  dbgs() << "nul";
145  dbgs() << " JT" << JT << " Align" << Align << '\n';
146  }
147 #endif
148  };
149 }
150 
151 namespace {
152  //===--------------------------------------------------------------------===//
153  /// ISel - X86-specific code to select X86 machine instructions for
154  /// SelectionDAG operations.
155  ///
156  class X86DAGToDAGISel final : public SelectionDAGISel {
157  /// Keep a pointer to the X86Subtarget around so that we can
158  /// make the right decision when generating code for different targets.
159  const X86Subtarget *Subtarget;
160 
161  /// If true, selector should try to optimize for code size instead of
162  /// performance.
163  bool OptForSize;
164 
165  /// If true, selector should try to optimize for minimum code size.
166  bool OptForMinSize;
167 
168  /// Disable direct TLS access through segment registers.
169  bool IndirectTlsSegRefs;
170 
171  public:
172  explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
173  : SelectionDAGISel(tm, OptLevel), OptForSize(false),
174  OptForMinSize(false) {}
175 
176  StringRef getPassName() const override {
177  return "X86 DAG->DAG Instruction Selection";
178  }
179 
180  bool runOnMachineFunction(MachineFunction &MF) override {
181  // Reset the subtarget each time through.
182  Subtarget = &MF.getSubtarget<X86Subtarget>();
183  IndirectTlsSegRefs = MF.getFunction().hasFnAttribute(
184  "indirect-tls-seg-refs");
186  return true;
187  }
188 
189  void EmitFunctionEntryCode() override;
190 
191  bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override;
192 
193  void PreprocessISelDAG() override;
194  void PostprocessISelDAG() override;
195 
196 // Include the pieces autogenerated from the target description.
197 #include "X86GenDAGISel.inc"
198 
199  private:
200  void Select(SDNode *N) override;
201 
202  bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
203  bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
204  bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
205  bool matchAddress(SDValue N, X86ISelAddressMode &AM);
206  bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM);
207  bool matchAdd(SDValue N, X86ISelAddressMode &AM, unsigned Depth);
208  bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
209  unsigned Depth);
210  bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
211  bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
212  SDValue &Scale, SDValue &Index, SDValue &Disp,
213  SDValue &Segment);
214  bool selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
215  SDValue &Scale, SDValue &Index, SDValue &Disp,
216  SDValue &Segment);
217  bool selectMOV64Imm32(SDValue N, SDValue &Imm);
218  bool selectLEAAddr(SDValue N, SDValue &Base,
219  SDValue &Scale, SDValue &Index, SDValue &Disp,
220  SDValue &Segment);
221  bool selectLEA64_32Addr(SDValue N, SDValue &Base,
222  SDValue &Scale, SDValue &Index, SDValue &Disp,
223  SDValue &Segment);
224  bool selectTLSADDRAddr(SDValue N, SDValue &Base,
225  SDValue &Scale, SDValue &Index, SDValue &Disp,
226  SDValue &Segment);
227  bool selectScalarSSELoad(SDNode *Root, SDNode *Parent, SDValue N,
228  SDValue &Base, SDValue &Scale,
229  SDValue &Index, SDValue &Disp,
230  SDValue &Segment,
231  SDValue &NodeWithChain);
232  bool selectRelocImm(SDValue N, SDValue &Op);
233 
234  bool tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
235  SDValue &Base, SDValue &Scale,
236  SDValue &Index, SDValue &Disp,
237  SDValue &Segment);
238 
239  // Convenience method where P is also root.
240  bool tryFoldLoad(SDNode *P, SDValue N,
241  SDValue &Base, SDValue &Scale,
242  SDValue &Index, SDValue &Disp,
243  SDValue &Segment) {
244  return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment);
245  }
246 
247  /// Implement addressing mode selection for inline asm expressions.
248  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
249  unsigned ConstraintID,
250  std::vector<SDValue> &OutOps) override;
251 
252  void emitSpecialCodeForMain();
253 
254  inline void getAddressOperands(X86ISelAddressMode &AM, const SDLoc &DL,
255  SDValue &Base, SDValue &Scale,
256  SDValue &Index, SDValue &Disp,
257  SDValue &Segment) {
258  Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
259  ? CurDAG->getTargetFrameIndex(
260  AM.Base_FrameIndex,
261  TLI->getPointerTy(CurDAG->getDataLayout()))
262  : AM.Base_Reg;
263  Scale = getI8Imm(AM.Scale, DL);
264  Index = AM.IndexReg;
265  // These are 32-bit even in 64-bit mode since RIP-relative offset
266  // is 32-bit.
267  if (AM.GV)
268  Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
269  MVT::i32, AM.Disp,
270  AM.SymbolFlags);
271  else if (AM.CP)
272  Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
273  AM.Align, AM.Disp, AM.SymbolFlags);
274  else if (AM.ES) {
275  assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
276  Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
277  } else if (AM.MCSym) {
278  assert(!AM.Disp && "Non-zero displacement is ignored with MCSym.");
279  assert(AM.SymbolFlags == 0 && "oo");
280  Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32);
281  } else if (AM.JT != -1) {
282  assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
283  Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
284  } else if (AM.BlockAddr)
285  Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
286  AM.SymbolFlags);
287  else
288  Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32);
289 
290  if (AM.Segment.getNode())
291  Segment = AM.Segment;
292  else
293  Segment = CurDAG->getRegister(0, MVT::i32);
294  }
295 
296  // Utility function to determine whether we should avoid selecting
297  // immediate forms of instructions for better code size or not.
298  // At a high level, we'd like to avoid such instructions when
299  // we have similar constants used within the same basic block
300  // that can be kept in a register.
301  //
302  bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const {
303  uint32_t UseCount = 0;
304 
305  // Do not want to hoist if we're not optimizing for size.
306  // TODO: We'd like to remove this restriction.
307  // See the comment in X86InstrInfo.td for more info.
308  if (!OptForSize)
309  return false;
310 
311  // Walk all the users of the immediate.
312  for (SDNode::use_iterator UI = N->use_begin(),
313  UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) {
314 
315  SDNode *User = *UI;
316 
317  // This user is already selected. Count it as a legitimate use and
318  // move on.
319  if (User->isMachineOpcode()) {
320  UseCount++;
321  continue;
322  }
323 
324  // We want to count stores of immediates as real uses.
325  if (User->getOpcode() == ISD::STORE &&
326  User->getOperand(1).getNode() == N) {
327  UseCount++;
328  continue;
329  }
330 
331  // We don't currently match users that have > 2 operands (except
332  // for stores, which are handled above)
333  // Those instruction won't match in ISEL, for now, and would
334  // be counted incorrectly.
335  // This may change in the future as we add additional instruction
336  // types.
337  if (User->getNumOperands() != 2)
338  continue;
339 
340  // Immediates that are used for offsets as part of stack
341  // manipulation should be left alone. These are typically
342  // used to indicate SP offsets for argument passing and
343  // will get pulled into stores/pushes (implicitly).
344  if (User->getOpcode() == X86ISD::ADD ||
345  User->getOpcode() == ISD::ADD ||
346  User->getOpcode() == X86ISD::SUB ||
347  User->getOpcode() == ISD::SUB) {
348 
349  // Find the other operand of the add/sub.
350  SDValue OtherOp = User->getOperand(0);
351  if (OtherOp.getNode() == N)
352  OtherOp = User->getOperand(1);
353 
354  // Don't count if the other operand is SP.
355  RegisterSDNode *RegNode;
356  if (OtherOp->getOpcode() == ISD::CopyFromReg &&
357  (RegNode = dyn_cast_or_null<RegisterSDNode>(
358  OtherOp->getOperand(1).getNode())))
359  if ((RegNode->getReg() == X86::ESP) ||
360  (RegNode->getReg() == X86::RSP))
361  continue;
362  }
363 
364  // ... otherwise, count this and move on.
365  UseCount++;
366  }
367 
368  // If we have more than 1 use, then recommend for hoisting.
369  return (UseCount > 1);
370  }
371 
372  /// Return a target constant with the specified value of type i8.
373  inline SDValue getI8Imm(unsigned Imm, const SDLoc &DL) {
374  return CurDAG->getTargetConstant(Imm, DL, MVT::i8);
375  }
376 
377  /// Return a target constant with the specified value, of type i32.
378  inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) {
379  return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
380  }
381 
382  /// Return a target constant with the specified value, of type i64.
383  inline SDValue getI64Imm(uint64_t Imm, const SDLoc &DL) {
384  return CurDAG->getTargetConstant(Imm, DL, MVT::i64);
385  }
386 
387  SDValue getExtractVEXTRACTImmediate(SDNode *N, unsigned VecWidth,
388  const SDLoc &DL) {
389  assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
390  uint64_t Index = N->getConstantOperandVal(1);
391  MVT VecVT = N->getOperand(0).getSimpleValueType();
392  return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
393  }
394 
395  SDValue getInsertVINSERTImmediate(SDNode *N, unsigned VecWidth,
396  const SDLoc &DL) {
397  assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
398  uint64_t Index = N->getConstantOperandVal(2);
399  MVT VecVT = N->getSimpleValueType(0);
400  return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
401  }
402 
403  /// Return an SDNode that returns the value of the global base register.
404  /// Output instructions required to initialize the global base register,
405  /// if necessary.
406  SDNode *getGlobalBaseReg();
407 
408  /// Return a reference to the TargetMachine, casted to the target-specific
409  /// type.
410  const X86TargetMachine &getTargetMachine() const {
411  return static_cast<const X86TargetMachine &>(TM);
412  }
413 
414  /// Return a reference to the TargetInstrInfo, casted to the target-specific
415  /// type.
416  const X86InstrInfo *getInstrInfo() const {
417  return Subtarget->getInstrInfo();
418  }
419 
420  /// Address-mode matching performs shift-of-and to and-of-shift
421  /// reassociation in order to expose more scaled addressing
422  /// opportunities.
423  bool ComplexPatternFuncMutatesDAG() const override {
424  return true;
425  }
426 
427  bool isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const;
428 
429  /// Returns whether this is a relocatable immediate in the range
430  /// [-2^Width .. 2^Width-1].
431  template <unsigned Width> bool isSExtRelocImm(SDNode *N) const {
432  if (auto *CN = dyn_cast<ConstantSDNode>(N))
433  return isInt<Width>(CN->getSExtValue());
434  return isSExtAbsoluteSymbolRef(Width, N);
435  }
436 
437  // Indicates we should prefer to use a non-temporal load for this load.
438  bool useNonTemporalLoad(LoadSDNode *N) const {
439  if (!N->isNonTemporal())
440  return false;
441 
442  unsigned StoreSize = N->getMemoryVT().getStoreSize();
443 
444  if (N->getAlignment() < StoreSize)
445  return false;
446 
447  switch (StoreSize) {
448  default: llvm_unreachable("Unsupported store size");
449  case 4:
450  case 8:
451  return false;
452  case 16:
453  return Subtarget->hasSSE41();
454  case 32:
455  return Subtarget->hasAVX2();
456  case 64:
457  return Subtarget->hasAVX512();
458  }
459  }
460 
461  bool foldLoadStoreIntoMemOperand(SDNode *Node);
462  MachineSDNode *matchBEXTRFromAndImm(SDNode *Node);
463  bool matchBitExtract(SDNode *Node);
464  bool shrinkAndImmediate(SDNode *N);
465  bool isMaskZeroExtended(SDNode *N) const;
466  bool tryShiftAmountMod(SDNode *N);
467 
468  MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
469  const SDLoc &dl, MVT VT, SDNode *Node);
470  MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
471  const SDLoc &dl, MVT VT, SDNode *Node,
472  SDValue &InFlag);
473 
474  bool tryOptimizeRem8Extend(SDNode *N);
475 
476  bool onlyUsesZeroFlag(SDValue Flags) const;
477  bool hasNoSignFlagUses(SDValue Flags) const;
478  bool hasNoCarryFlagUses(SDValue Flags) const;
479  };
480 }
481 
482 
483 // Returns true if this masked compare can be implemented legally with this
484 // type.
485 static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
486  unsigned Opcode = N->getOpcode();
487  if (Opcode == X86ISD::CMPM || Opcode == ISD::SETCC ||
488  Opcode == X86ISD::CMPM_RND || Opcode == X86ISD::VFPCLASS) {
489  // We can get 256-bit 8 element types here without VLX being enabled. When
490  // this happens we will use 512-bit operations and the mask will not be
491  // zero extended.
492  EVT OpVT = N->getOperand(0).getValueType();
493  if (OpVT.is256BitVector() || OpVT.is128BitVector())
494  return Subtarget->hasVLX();
495 
496  return true;
497  }
498  // Scalar opcodes use 128 bit registers, but aren't subject to the VLX check.
499  if (Opcode == X86ISD::VFPCLASSS || Opcode == X86ISD::FSETCCM ||
500  Opcode == X86ISD::FSETCCM_RND)
501  return true;
502 
503  return false;
504 }
505 
506 // Returns true if we can assume the writer of the mask has zero extended it
507 // for us.
508 bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const {
509  // If this is an AND, check if we have a compare on either side. As long as
510  // one side guarantees the mask is zero extended, the AND will preserve those
511  // zeros.
512  if (N->getOpcode() == ISD::AND)
513  return isLegalMaskCompare(N->getOperand(0).getNode(), Subtarget) ||
514  isLegalMaskCompare(N->getOperand(1).getNode(), Subtarget);
515 
516  return isLegalMaskCompare(N, Subtarget);
517 }
518 
519 bool
520 X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
521  if (OptLevel == CodeGenOpt::None) return false;
522 
523  if (!N.hasOneUse())
524  return false;
525 
526  if (N.getOpcode() != ISD::LOAD)
527  return true;
528 
529  // Don't fold non-temporal loads if we have an instruction for them.
530  if (useNonTemporalLoad(cast<LoadSDNode>(N)))
531  return false;
532 
533  // If N is a load, do additional profitability checks.
534  if (U == Root) {
535  switch (U->getOpcode()) {
536  default: break;
537  case X86ISD::ADD:
538  case X86ISD::ADC:
539  case X86ISD::SUB:
540  case X86ISD::SBB:
541  case X86ISD::AND:
542  case X86ISD::XOR:
543  case X86ISD::OR:
544  case ISD::ADD:
545  case ISD::ADDCARRY:
546  case ISD::AND:
547  case ISD::OR:
548  case ISD::XOR: {
549  SDValue Op1 = U->getOperand(1);
550 
551  // If the other operand is a 8-bit immediate we should fold the immediate
552  // instead. This reduces code size.
553  // e.g.
554  // movl 4(%esp), %eax
555  // addl $4, %eax
556  // vs.
557  // movl $4, %eax
558  // addl 4(%esp), %eax
559  // The former is 2 bytes shorter. In case where the increment is 1, then
560  // the saving can be 4 bytes (by using incl %eax).
561  if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1)) {
562  if (Imm->getAPIntValue().isSignedIntN(8))
563  return false;
564 
565  // If this is a 64-bit AND with an immediate that fits in 32-bits,
566  // prefer using the smaller and over folding the load. This is needed to
567  // make sure immediates created by shrinkAndImmediate are always folded.
568  // Ideally we would narrow the load during DAG combine and get the
569  // best of both worlds.
570  if (U->getOpcode() == ISD::AND &&
571  Imm->getAPIntValue().getBitWidth() == 64 &&
572  Imm->getAPIntValue().isIntN(32))
573  return false;
574  }
575 
576  // If the other operand is a TLS address, we should fold it instead.
577  // This produces
578  // movl %gs:0, %eax
579  // leal i@NTPOFF(%eax), %eax
580  // instead of
581  // movl $i@NTPOFF, %eax
582  // addl %gs:0, %eax
583  // if the block also has an access to a second TLS address this will save
584  // a load.
585  // FIXME: This is probably also true for non-TLS addresses.
586  if (Op1.getOpcode() == X86ISD::Wrapper) {
587  SDValue Val = Op1.getOperand(0);
589  return false;
590  }
591 
592  // Don't fold load if this matches the BTS/BTR/BTC patterns.
593  // BTS: (or X, (shl 1, n))
594  // BTR: (and X, (rotl -2, n))
595  // BTC: (xor X, (shl 1, n))
596  if (U->getOpcode() == ISD::OR || U->getOpcode() == ISD::XOR) {
597  if (U->getOperand(0).getOpcode() == ISD::SHL &&
599  return false;
600 
601  if (U->getOperand(1).getOpcode() == ISD::SHL &&
603  return false;
604  }
605  if (U->getOpcode() == ISD::AND) {
606  SDValue U0 = U->getOperand(0);
607  SDValue U1 = U->getOperand(1);
608  if (U0.getOpcode() == ISD::ROTL) {
609  auto *C = dyn_cast<ConstantSDNode>(U0.getOperand(0));
610  if (C && C->getSExtValue() == -2)
611  return false;
612  }
613 
614  if (U1.getOpcode() == ISD::ROTL) {
615  auto *C = dyn_cast<ConstantSDNode>(U1.getOperand(0));
616  if (C && C->getSExtValue() == -2)
617  return false;
618  }
619  }
620 
621  break;
622  }
623  case ISD::SHL:
624  case ISD::SRA:
625  case ISD::SRL:
626  // Don't fold a load into a shift by immediate. The BMI2 instructions
627  // support folding a load, but not an immediate. The legacy instructions
628  // support folding an immediate, but can't fold a load. Folding an
629  // immediate is preferable to folding a load.
630  if (isa<ConstantSDNode>(U->getOperand(1)))
631  return false;
632 
633  break;
634  }
635  }
636 
637  // Prevent folding a load if this can implemented with an insert_subreg or
638  // a move that implicitly zeroes.
639  if (Root->getOpcode() == ISD::INSERT_SUBVECTOR &&
640  isNullConstant(Root->getOperand(2)) &&
641  (Root->getOperand(0).isUndef() ||
643  return false;
644 
645  return true;
646 }
647 
648 /// Replace the original chain operand of the call with
649 /// load's chain operand and move load below the call's chain operand.
651  SDValue Call, SDValue OrigChain) {
653  SDValue Chain = OrigChain.getOperand(0);
654  if (Chain.getNode() == Load.getNode())
655  Ops.push_back(Load.getOperand(0));
656  else {
657  assert(Chain.getOpcode() == ISD::TokenFactor &&
658  "Unexpected chain operand");
659  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
660  if (Chain.getOperand(i).getNode() == Load.getNode())
661  Ops.push_back(Load.getOperand(0));
662  else
663  Ops.push_back(Chain.getOperand(i));
664  SDValue NewChain =
665  CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops);
666  Ops.clear();
667  Ops.push_back(NewChain);
668  }
669  Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end());
670  CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops);
671  CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
672  Load.getOperand(1), Load.getOperand(2));
673 
674  Ops.clear();
675  Ops.push_back(SDValue(Load.getNode(), 1));
676  Ops.append(Call->op_begin() + 1, Call->op_end());
677  CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
678 }
679 
680 /// Return true if call address is a load and it can be
681 /// moved below CALLSEQ_START and the chains leading up to the call.
682 /// Return the CALLSEQ_START by reference as a second output.
683 /// In the case of a tail call, there isn't a callseq node between the call
684 /// chain and the load.
685 static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
686  // The transformation is somewhat dangerous if the call's chain was glued to
687  // the call. After MoveBelowOrigChain the load is moved between the call and
688  // the chain, this can create a cycle if the load is not folded. So it is
689  // *really* important that we are sure the load will be folded.
690  if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
691  return false;
692  LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
693  if (!LD ||
694  LD->isVolatile() ||
697  return false;
698 
699  // Now let's find the callseq_start.
700  while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
701  if (!Chain.hasOneUse())
702  return false;
703  Chain = Chain.getOperand(0);
704  }
705 
706  if (!Chain.getNumOperands())
707  return false;
708  // Since we are not checking for AA here, conservatively abort if the chain
709  // writes to memory. It's not safe to move the callee (a load) across a store.
710  if (isa<MemSDNode>(Chain.getNode()) &&
711  cast<MemSDNode>(Chain.getNode())->writeMem())
712  return false;
713  if (Chain.getOperand(0).getNode() == Callee.getNode())
714  return true;
715  if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
716  Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
717  Callee.getValue(1).hasOneUse())
718  return true;
719  return false;
720 }
721 
722 void X86DAGToDAGISel::PreprocessISelDAG() {
723  // OptFor[Min]Size are used in pattern predicates that isel is matching.
724  OptForSize = MF->getFunction().optForSize();
725  OptForMinSize = MF->getFunction().optForMinSize();
726  assert((!OptForMinSize || OptForSize) && "OptForMinSize implies OptForSize");
727 
728  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
729  E = CurDAG->allnodes_end(); I != E; ) {
730  SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
731 
732  // If this is a target specific AND node with no flag usages, turn it back
733  // into ISD::AND to enable test instruction matching.
734  if (N->getOpcode() == X86ISD::AND && !N->hasAnyUseOfValue(1)) {
735  SDValue Res = CurDAG->getNode(ISD::AND, SDLoc(N), N->getValueType(0),
736  N->getOperand(0), N->getOperand(1));
737  --I;
738  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
739  ++I;
740  CurDAG->DeleteNode(N);
741  continue;
742  }
743 
744  if (OptLevel != CodeGenOpt::None &&
745  // Only do this when the target can fold the load into the call or
746  // jmp.
747  !Subtarget->useRetpolineIndirectCalls() &&
748  ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
749  (N->getOpcode() == X86ISD::TC_RETURN &&
750  (Subtarget->is64Bit() ||
751  !getTargetMachine().isPositionIndependent())))) {
752  /// Also try moving call address load from outside callseq_start to just
753  /// before the call to allow it to be folded.
754  ///
755  /// [Load chain]
756  /// ^
757  /// |
758  /// [Load]
759  /// ^ ^
760  /// | |
761  /// / \--
762  /// / |
763  ///[CALLSEQ_START] |
764  /// ^ |
765  /// | |
766  /// [LOAD/C2Reg] |
767  /// | |
768  /// \ /
769  /// \ /
770  /// [CALL]
771  bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
772  SDValue Chain = N->getOperand(0);
773  SDValue Load = N->getOperand(1);
774  if (!isCalleeLoad(Load, Chain, HasCallSeq))
775  continue;
776  moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
777  ++NumLoadMoved;
778  continue;
779  }
780 
781  // Lower fpround and fpextend nodes that target the FP stack to be store and
782  // load to the stack. This is a gross hack. We would like to simply mark
783  // these as being illegal, but when we do that, legalize produces these when
784  // it expands calls, then expands these in the same legalize pass. We would
785  // like dag combine to be able to hack on these between the call expansion
786  // and the node legalization. As such this pass basically does "really
787  // late" legalization of these inline with the X86 isel pass.
788  // FIXME: This should only happen when not compiled with -O0.
789  if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
790  continue;
791 
792  MVT SrcVT = N->getOperand(0).getSimpleValueType();
793  MVT DstVT = N->getSimpleValueType(0);
794 
795  // If any of the sources are vectors, no fp stack involved.
796  if (SrcVT.isVector() || DstVT.isVector())
797  continue;
798 
799  // If the source and destination are SSE registers, then this is a legal
800  // conversion that should not be lowered.
801  const X86TargetLowering *X86Lowering =
802  static_cast<const X86TargetLowering *>(TLI);
803  bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
804  bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
805  if (SrcIsSSE && DstIsSSE)
806  continue;
807 
808  if (!SrcIsSSE && !DstIsSSE) {
809  // If this is an FPStack extension, it is a noop.
810  if (N->getOpcode() == ISD::FP_EXTEND)
811  continue;
812  // If this is a value-preserving FPStack truncation, it is a noop.
813  if (N->getConstantOperandVal(1))
814  continue;
815  }
816 
817  // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
818  // FPStack has extload and truncstore. SSE can fold direct loads into other
819  // operations. Based on this, decide what we want to do.
820  MVT MemVT;
821  if (N->getOpcode() == ISD::FP_ROUND)
822  MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
823  else
824  MemVT = SrcIsSSE ? SrcVT : DstVT;
825 
826  SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
827  SDLoc dl(N);
828 
829  // FIXME: optimize the case where the src/dest is a load or store?
830  SDValue Store =
831  CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0),
832  MemTmp, MachinePointerInfo(), MemVT);
833  SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
834  MachinePointerInfo(), MemVT);
835 
836  // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
837  // extload we created. This will cause general havok on the dag because
838  // anything below the conversion could be folded into other existing nodes.
839  // To avoid invalidating 'I', back it up to the convert node.
840  --I;
841  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
842 
843  // Now that we did that, the node is dead. Increment the iterator to the
844  // next node to process, then delete N.
845  ++I;
846  CurDAG->DeleteNode(N);
847  }
848 }
849 
850 // Look for a redundant movzx/movsx that can occur after an 8-bit divrem.
851 bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) {
852  unsigned Opc = N->getMachineOpcode();
853  if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 &&
854  Opc != X86::MOVSX64rr8)
855  return false;
856 
857  SDValue N0 = N->getOperand(0);
858 
859  // We need to be extracting the lower bit of an extend.
860  if (!N0.isMachineOpcode() ||
861  N0.getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG ||
862  N0.getConstantOperandVal(1) != X86::sub_8bit)
863  return false;
864 
865  // We're looking for either a movsx or movzx to match the original opcode.
866  unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX
867  : X86::MOVSX32rr8_NOREX;
868  SDValue N00 = N0.getOperand(0);
869  if (!N00.isMachineOpcode() || N00.getMachineOpcode() != ExpectedOpc)
870  return false;
871 
872  if (Opc == X86::MOVSX64rr8) {
873  // If we had a sign extend from 8 to 64 bits. We still need to go from 32
874  // to 64.
875  MachineSDNode *Extend = CurDAG->getMachineNode(X86::MOVSX64rr32, SDLoc(N),
876  MVT::i64, N00);
877  ReplaceUses(N, Extend);
878  } else {
879  // Ok we can drop this extend and just use the original extend.
880  ReplaceUses(N, N00.getNode());
881  }
882 
883  return true;
884 }
885 
886 void X86DAGToDAGISel::PostprocessISelDAG() {
887  // Skip peepholes at -O0.
888  if (TM.getOptLevel() == CodeGenOpt::None)
889  return;
890 
891  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
892 
893  bool MadeChange = false;
894  while (Position != CurDAG->allnodes_begin()) {
895  SDNode *N = &*--Position;
896  // Skip dead nodes and any non-machine opcodes.
897  if (N->use_empty() || !N->isMachineOpcode())
898  continue;
899 
900  if (tryOptimizeRem8Extend(N)) {
901  MadeChange = true;
902  continue;
903  }
904 
905  // Look for a TESTrr+ANDrr pattern where both operands of the test are
906  // the same. Rewrite to remove the AND.
907  unsigned Opc = N->getMachineOpcode();
908  if ((Opc == X86::TEST8rr || Opc == X86::TEST16rr ||
909  Opc == X86::TEST32rr || Opc == X86::TEST64rr) &&
910  N->getOperand(0) == N->getOperand(1) &&
911  N->isOnlyUserOf(N->getOperand(0).getNode()) &&
912  N->getOperand(0).isMachineOpcode()) {
913  SDValue And = N->getOperand(0);
914  unsigned N0Opc = And.getMachineOpcode();
915  if (N0Opc == X86::AND8rr || N0Opc == X86::AND16rr ||
916  N0Opc == X86::AND32rr || N0Opc == X86::AND64rr) {
917  MachineSDNode *Test = CurDAG->getMachineNode(Opc, SDLoc(N),
918  MVT::i32,
919  And.getOperand(0),
920  And.getOperand(1));
921  ReplaceUses(N, Test);
922  MadeChange = true;
923  continue;
924  }
925  if (N0Opc == X86::AND8rm || N0Opc == X86::AND16rm ||
926  N0Opc == X86::AND32rm || N0Opc == X86::AND64rm) {
927  unsigned NewOpc;
928  switch (N0Opc) {
929  case X86::AND8rm: NewOpc = X86::TEST8mr; break;
930  case X86::AND16rm: NewOpc = X86::TEST16mr; break;
931  case X86::AND32rm: NewOpc = X86::TEST32mr; break;
932  case X86::AND64rm: NewOpc = X86::TEST64mr; break;
933  }
934 
935  // Need to swap the memory and register operand.
936  SDValue Ops[] = { And.getOperand(1),
937  And.getOperand(2),
938  And.getOperand(3),
939  And.getOperand(4),
940  And.getOperand(5),
941  And.getOperand(0),
942  And.getOperand(6) /* Chain */ };
943  MachineSDNode *Test = CurDAG->getMachineNode(NewOpc, SDLoc(N),
944  MVT::i32, MVT::Other, Ops);
945  ReplaceUses(N, Test);
946  MadeChange = true;
947  continue;
948  }
949  }
950 
951  // Look for a KAND+KORTEST and turn it into KTEST if only the zero flag is
952  // used. We're doing this late so we can prefer to fold the AND into masked
953  // comparisons. Doing that can be better for the live range of the mask
954  // register.
955  if ((Opc == X86::KORTESTBrr || Opc == X86::KORTESTWrr ||
956  Opc == X86::KORTESTDrr || Opc == X86::KORTESTQrr) &&
957  N->getOperand(0) == N->getOperand(1) &&
958  N->isOnlyUserOf(N->getOperand(0).getNode()) &&
959  N->getOperand(0).isMachineOpcode() &&
960  onlyUsesZeroFlag(SDValue(N, 0))) {
961  SDValue And = N->getOperand(0);
962  unsigned N0Opc = And.getMachineOpcode();
963  // KANDW is legal with AVX512F, but KTESTW requires AVX512DQ. The other
964  // KAND instructions and KTEST use the same ISA feature.
965  if (N0Opc == X86::KANDBrr ||
966  (N0Opc == X86::KANDWrr && Subtarget->hasDQI()) ||
967  N0Opc == X86::KANDDrr || N0Opc == X86::KANDQrr) {
968  unsigned NewOpc;
969  switch (Opc) {
970  default: llvm_unreachable("Unexpected opcode!");
971  case X86::KORTESTBrr: NewOpc = X86::KTESTBrr; break;
972  case X86::KORTESTWrr: NewOpc = X86::KTESTWrr; break;
973  case X86::KORTESTDrr: NewOpc = X86::KTESTDrr; break;
974  case X86::KORTESTQrr: NewOpc = X86::KTESTQrr; break;
975  }
976  MachineSDNode *KTest = CurDAG->getMachineNode(NewOpc, SDLoc(N),
977  MVT::i32,
978  And.getOperand(0),
979  And.getOperand(1));
980  ReplaceUses(N, KTest);
981  MadeChange = true;
982  continue;
983  }
984  }
985 
986  // Attempt to remove vectors moves that were inserted to zero upper bits.
987  if (Opc != TargetOpcode::SUBREG_TO_REG)
988  continue;
989 
990  unsigned SubRegIdx = N->getConstantOperandVal(2);
991  if (SubRegIdx != X86::sub_xmm && SubRegIdx != X86::sub_ymm)
992  continue;
993 
994  SDValue Move = N->getOperand(1);
995  if (!Move.isMachineOpcode())
996  continue;
997 
998  // Make sure its one of the move opcodes we recognize.
999  switch (Move.getMachineOpcode()) {
1000  default:
1001  continue;
1002  case X86::VMOVAPDrr: case X86::VMOVUPDrr:
1003  case X86::VMOVAPSrr: case X86::VMOVUPSrr:
1004  case X86::VMOVDQArr: case X86::VMOVDQUrr:
1005  case X86::VMOVAPDYrr: case X86::VMOVUPDYrr:
1006  case X86::VMOVAPSYrr: case X86::VMOVUPSYrr:
1007  case X86::VMOVDQAYrr: case X86::VMOVDQUYrr:
1008  case X86::VMOVAPDZ128rr: case X86::VMOVUPDZ128rr:
1009  case X86::VMOVAPSZ128rr: case X86::VMOVUPSZ128rr:
1010  case X86::VMOVDQA32Z128rr: case X86::VMOVDQU32Z128rr:
1011  case X86::VMOVDQA64Z128rr: case X86::VMOVDQU64Z128rr:
1012  case X86::VMOVAPDZ256rr: case X86::VMOVUPDZ256rr:
1013  case X86::VMOVAPSZ256rr: case X86::VMOVUPSZ256rr:
1014  case X86::VMOVDQA32Z256rr: case X86::VMOVDQU32Z256rr:
1015  case X86::VMOVDQA64Z256rr: case X86::VMOVDQU64Z256rr:
1016  break;
1017  }
1018 
1019  SDValue In = Move.getOperand(0);
1020  if (!In.isMachineOpcode() ||
1021  In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END)
1022  continue;
1023 
1024  // Make sure the instruction has a VEX, XOP, or EVEX prefix. This covers
1025  // the SHA instructions which use a legacy encoding.
1026  uint64_t TSFlags = getInstrInfo()->get(In.getMachineOpcode()).TSFlags;
1027  if ((TSFlags & X86II::EncodingMask) != X86II::VEX &&
1028  (TSFlags & X86II::EncodingMask) != X86II::EVEX &&
1029  (TSFlags & X86II::EncodingMask) != X86II::XOP)
1030  continue;
1031 
1032  // Producing instruction is another vector instruction. We can drop the
1033  // move.
1034  CurDAG->UpdateNodeOperands(N, N->getOperand(0), In, N->getOperand(2));
1035  MadeChange = true;
1036  }
1037 
1038  if (MadeChange)
1039  CurDAG->RemoveDeadNodes();
1040 }
1041 
1042 
1043 /// Emit any code that needs to be executed only in the main function.
1044 void X86DAGToDAGISel::emitSpecialCodeForMain() {
1045  if (Subtarget->isTargetCygMing()) {
1047  auto &DL = CurDAG->getDataLayout();
1048 
1049  TargetLowering::CallLoweringInfo CLI(*CurDAG);
1050  CLI.setChain(CurDAG->getRoot())
1051  .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()),
1052  CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)),
1053  std::move(Args));
1054  const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
1055  std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
1056  CurDAG->setRoot(Result.second);
1057  }
1058 }
1059 
1060 void X86DAGToDAGISel::EmitFunctionEntryCode() {
1061  // If this is main, emit special code for main.
1062  const Function &F = MF->getFunction();
1063  if (F.hasExternalLinkage() && F.getName() == "main")
1064  emitSpecialCodeForMain();
1065 }
1066 
1067 static bool isDispSafeForFrameIndex(int64_t Val) {
1068  // On 64-bit platforms, we can run into an issue where a frame index
1069  // includes a displacement that, when added to the explicit displacement,
1070  // will overflow the displacement field. Assuming that the frame index
1071  // displacement fits into a 31-bit integer (which is only slightly more
1072  // aggressive than the current fundamental assumption that it fits into
1073  // a 32-bit integer), a 31-bit disp should always be safe.
1074  return isInt<31>(Val);
1075 }
1076 
1077 bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
1078  X86ISelAddressMode &AM) {
1079  // If there's no offset to fold, we don't need to do any work.
1080  if (Offset == 0)
1081  return false;
1082 
1083  // Cannot combine ExternalSymbol displacements with integer offsets.
1084  if (AM.ES || AM.MCSym)
1085  return true;
1086 
1087  int64_t Val = AM.Disp + Offset;
1088  CodeModel::Model M = TM.getCodeModel();
1089  if (Subtarget->is64Bit()) {
1091  AM.hasSymbolicDisplacement()))
1092  return true;
1093  // In addition to the checks required for a register base, check that
1094  // we do not try to use an unsafe Disp with a frame index.
1095  if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
1097  return true;
1098  }
1099  AM.Disp = Val;
1100  return false;
1101 
1102 }
1103 
1104 bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
1105  SDValue Address = N->getOperand(1);
1106 
1107  // load gs:0 -> GS segment register.
1108  // load fs:0 -> FS segment register.
1109  //
1110  // This optimization is valid because the GNU TLS model defines that
1111  // gs:0 (or fs:0 on X86-64) contains its own address.
1112  // For more information see http://people.redhat.com/drepper/tls.pdf
1113  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
1114  if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
1115  !IndirectTlsSegRefs &&
1116  (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
1117  Subtarget->isTargetFuchsia()))
1118  switch (N->getPointerInfo().getAddrSpace()) {
1119  case 256:
1120  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1121  return false;
1122  case 257:
1123  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1124  return false;
1125  // Address space 258 is not handled here, because it is not used to
1126  // address TLS areas.
1127  }
1128 
1129  return true;
1130 }
1131 
1132 /// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing
1133 /// mode. These wrap things that will resolve down into a symbol reference.
1134 /// If no match is possible, this returns true, otherwise it returns false.
1135 bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
1136  // If the addressing mode already has a symbol as the displacement, we can
1137  // never match another symbol.
1138  if (AM.hasSymbolicDisplacement())
1139  return true;
1140 
1141  bool IsRIPRelTLS = false;
1142  bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP;
1143  if (IsRIPRel) {
1144  SDValue Val = N.getOperand(0);
1146  IsRIPRelTLS = true;
1147  }
1148 
1149  // We can't use an addressing mode in the 64-bit large code model.
1150  // Global TLS addressing is an exception. In the medium code model,
1151  // we use can use a mode when RIP wrappers are present.
1152  // That signifies access to globals that are known to be "near",
1153  // such as the GOT itself.
1154  CodeModel::Model M = TM.getCodeModel();
1155  if (Subtarget->is64Bit() &&
1156  ((M == CodeModel::Large && !IsRIPRelTLS) ||
1157  (M == CodeModel::Medium && !IsRIPRel)))
1158  return true;
1159 
1160  // Base and index reg must be 0 in order to use %rip as base.
1161  if (IsRIPRel && AM.hasBaseOrIndexReg())
1162  return true;
1163 
1164  // Make a local copy in case we can't do this fold.
1165  X86ISelAddressMode Backup = AM;
1166 
1167  int64_t Offset = 0;
1168  SDValue N0 = N.getOperand(0);
1169  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
1170  AM.GV = G->getGlobal();
1171  AM.SymbolFlags = G->getTargetFlags();
1172  Offset = G->getOffset();
1173  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
1174  AM.CP = CP->getConstVal();
1175  AM.Align = CP->getAlignment();
1176  AM.SymbolFlags = CP->getTargetFlags();
1177  Offset = CP->getOffset();
1178  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
1179  AM.ES = S->getSymbol();
1180  AM.SymbolFlags = S->getTargetFlags();
1181  } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
1182  AM.MCSym = S->getMCSymbol();
1183  } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
1184  AM.JT = J->getIndex();
1185  AM.SymbolFlags = J->getTargetFlags();
1186  } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
1187  AM.BlockAddr = BA->getBlockAddress();
1188  AM.SymbolFlags = BA->getTargetFlags();
1189  Offset = BA->getOffset();
1190  } else
1191  llvm_unreachable("Unhandled symbol reference node.");
1192 
1193  if (foldOffsetIntoAddress(Offset, AM)) {
1194  AM = Backup;
1195  return true;
1196  }
1197 
1198  if (IsRIPRel)
1199  AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
1200 
1201  // Commit the changes now that we know this fold is safe.
1202  return false;
1203 }
1204 
1205 /// Add the specified node to the specified addressing mode, returning true if
1206 /// it cannot be done. This just pattern matches for the addressing mode.
1207 bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
1208  if (matchAddressRecursively(N, AM, 0))
1209  return true;
1210 
1211  // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
1212  // a smaller encoding and avoids a scaled-index.
1213  if (AM.Scale == 2 &&
1214  AM.BaseType == X86ISelAddressMode::RegBase &&
1215  AM.Base_Reg.getNode() == nullptr) {
1216  AM.Base_Reg = AM.IndexReg;
1217  AM.Scale = 1;
1218  }
1219 
1220  // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
1221  // because it has a smaller encoding.
1222  // TODO: Which other code models can use this?
1223  if (TM.getCodeModel() == CodeModel::Small &&
1224  Subtarget->is64Bit() &&
1225  AM.Scale == 1 &&
1226  AM.BaseType == X86ISelAddressMode::RegBase &&
1227  AM.Base_Reg.getNode() == nullptr &&
1228  AM.IndexReg.getNode() == nullptr &&
1229  AM.SymbolFlags == X86II::MO_NO_FLAG &&
1230  AM.hasSymbolicDisplacement())
1231  AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
1232 
1233  return false;
1234 }
1235 
1236 bool X86DAGToDAGISel::matchAdd(SDValue N, X86ISelAddressMode &AM,
1237  unsigned Depth) {
1238  // Add an artificial use to this node so that we can keep track of
1239  // it if it gets CSE'd with a different node.
1240  HandleSDNode Handle(N);
1241 
1242  X86ISelAddressMode Backup = AM;
1243  if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
1244  !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
1245  return false;
1246  AM = Backup;
1247 
1248  // Try again after commuting the operands.
1249  if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1) &&
1250  !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
1251  return false;
1252  AM = Backup;
1253 
1254  // If we couldn't fold both operands into the address at the same time,
1255  // see if we can just put each operand into a register and fold at least
1256  // the add.
1257  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1258  !AM.Base_Reg.getNode() &&
1259  !AM.IndexReg.getNode()) {
1260  N = Handle.getValue();
1261  AM.Base_Reg = N.getOperand(0);
1262  AM.IndexReg = N.getOperand(1);
1263  AM.Scale = 1;
1264  return false;
1265  }
1266  N = Handle.getValue();
1267  return true;
1268 }
1269 
1270 // Insert a node into the DAG at least before the Pos node's position. This
1271 // will reposition the node as needed, and will assign it a node ID that is <=
1272 // the Pos node's ID. Note that this does *not* preserve the uniqueness of node
1273 // IDs! The selection DAG must no longer depend on their uniqueness when this
1274 // is used.
1275 static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
1276  if (N->getNodeId() == -1 ||
1279  DAG.RepositionNode(Pos->getIterator(), N.getNode());
1280  // Mark Node as invalid for pruning as after this it may be a successor to a
1281  // selected node but otherwise be in the same position of Pos.
1282  // Conservatively mark it with the same -abs(Id) to assure node id
1283  // invariant is preserved.
1284  N->setNodeId(Pos->getNodeId());
1286  }
1287 }
1288 
1289 // Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if
1290 // safe. This allows us to convert the shift and and into an h-register
1291 // extract and a scaled index. Returns false if the simplification is
1292 // performed.
1294  uint64_t Mask,
1295  SDValue Shift, SDValue X,
1296  X86ISelAddressMode &AM) {
1297  if (Shift.getOpcode() != ISD::SRL ||
1298  !isa<ConstantSDNode>(Shift.getOperand(1)) ||
1299  !Shift.hasOneUse())
1300  return true;
1301 
1302  int ScaleLog = 8 - Shift.getConstantOperandVal(1);
1303  if (ScaleLog <= 0 || ScaleLog >= 4 ||
1304  Mask != (0xffu << ScaleLog))
1305  return true;
1306 
1307  MVT VT = N.getSimpleValueType();
1308  SDLoc DL(N);
1309  SDValue Eight = DAG.getConstant(8, DL, MVT::i8);
1310  SDValue NewMask = DAG.getConstant(0xff, DL, VT);
1311  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight);
1312  SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask);
1313  SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8);
1314  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
1315 
1316  // Insert the new nodes into the topological ordering. We must do this in
1317  // a valid topological ordering as nothing is going to go back and re-sort
1318  // these nodes. We continually insert before 'N' in sequence as this is
1319  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1320  // hierarchy left to express.
1321  insertDAGNode(DAG, N, Eight);
1322  insertDAGNode(DAG, N, Srl);
1323  insertDAGNode(DAG, N, NewMask);
1324  insertDAGNode(DAG, N, And);
1325  insertDAGNode(DAG, N, ShlCount);
1326  insertDAGNode(DAG, N, Shl);
1327  DAG.ReplaceAllUsesWith(N, Shl);
1328  AM.IndexReg = And;
1329  AM.Scale = (1 << ScaleLog);
1330  return false;
1331 }
1332 
1333 // Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
1334 // allows us to fold the shift into this addressing mode. Returns false if the
1335 // transform succeeded.
1337  uint64_t Mask,
1338  SDValue Shift, SDValue X,
1339  X86ISelAddressMode &AM) {
1340  if (Shift.getOpcode() != ISD::SHL ||
1341  !isa<ConstantSDNode>(Shift.getOperand(1)))
1342  return true;
1343 
1344  // Not likely to be profitable if either the AND or SHIFT node has more
1345  // than one use (unless all uses are for address computation). Besides,
1346  // isel mechanism requires their node ids to be reused.
1347  if (!N.hasOneUse() || !Shift.hasOneUse())
1348  return true;
1349 
1350  // Verify that the shift amount is something we can fold.
1351  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1352  if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
1353  return true;
1354 
1355  MVT VT = N.getSimpleValueType();
1356  SDLoc DL(N);
1357  SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT);
1358  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
1359  SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
1360 
1361  // Insert the new nodes into the topological ordering. We must do this in
1362  // a valid topological ordering as nothing is going to go back and re-sort
1363  // these nodes. We continually insert before 'N' in sequence as this is
1364  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1365  // hierarchy left to express.
1366  insertDAGNode(DAG, N, NewMask);
1367  insertDAGNode(DAG, N, NewAnd);
1368  insertDAGNode(DAG, N, NewShift);
1369  DAG.ReplaceAllUsesWith(N, NewShift);
1370 
1371  AM.Scale = 1 << ShiftAmt;
1372  AM.IndexReg = NewAnd;
1373  return false;
1374 }
1375 
1376 // Implement some heroics to detect shifts of masked values where the mask can
1377 // be replaced by extending the shift and undoing that in the addressing mode
1378 // scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
1379 // (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
1380 // the addressing mode. This results in code such as:
1381 //
1382 // int f(short *y, int *lookup_table) {
1383 // ...
1384 // return *y + lookup_table[*y >> 11];
1385 // }
1386 //
1387 // Turning into:
1388 // movzwl (%rdi), %eax
1389 // movl %eax, %ecx
1390 // shrl $11, %ecx
1391 // addl (%rsi,%rcx,4), %eax
1392 //
1393 // Instead of:
1394 // movzwl (%rdi), %eax
1395 // movl %eax, %ecx
1396 // shrl $9, %ecx
1397 // andl $124, %rcx
1398 // addl (%rsi,%rcx), %eax
1399 //
1400 // Note that this function assumes the mask is provided as a mask *after* the
1401 // value is shifted. The input chain may or may not match that, but computing
1402 // such a mask is trivial.
1404  uint64_t Mask,
1405  SDValue Shift, SDValue X,
1406  X86ISelAddressMode &AM) {
1407  if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
1408  !isa<ConstantSDNode>(Shift.getOperand(1)))
1409  return true;
1410 
1411  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1412  unsigned MaskLZ = countLeadingZeros(Mask);
1413  unsigned MaskTZ = countTrailingZeros(Mask);
1414 
1415  // The amount of shift we're trying to fit into the addressing mode is taken
1416  // from the trailing zeros of the mask.
1417  unsigned AMShiftAmt = MaskTZ;
1418 
1419  // There is nothing we can do here unless the mask is removing some bits.
1420  // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
1421  if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
1422 
1423  // We also need to ensure that mask is a continuous run of bits.
1424  if (countTrailingOnes(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
1425 
1426  // Scale the leading zero count down based on the actual size of the value.
1427  // Also scale it down based on the size of the shift.
1428  unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
1429  if (MaskLZ < ScaleDown)
1430  return true;
1431  MaskLZ -= ScaleDown;
1432 
1433  // The final check is to ensure that any masked out high bits of X are
1434  // already known to be zero. Otherwise, the mask has a semantic impact
1435  // other than masking out a couple of low bits. Unfortunately, because of
1436  // the mask, zero extensions will be removed from operands in some cases.
1437  // This code works extra hard to look through extensions because we can
1438  // replace them with zero extensions cheaply if necessary.
1439  bool ReplacingAnyExtend = false;
1440  if (X.getOpcode() == ISD::ANY_EXTEND) {
1441  unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() -
1443  // Assume that we'll replace the any-extend with a zero-extend, and
1444  // narrow the search to the extended value.
1445  X = X.getOperand(0);
1446  MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
1447  ReplacingAnyExtend = true;
1448  }
1449  APInt MaskedHighBits =
1451  KnownBits Known = DAG.computeKnownBits(X);
1452  if (MaskedHighBits != Known.Zero) return true;
1453 
1454  // We've identified a pattern that can be transformed into a single shift
1455  // and an addressing mode. Make it so.
1456  MVT VT = N.getSimpleValueType();
1457  if (ReplacingAnyExtend) {
1458  assert(X.getValueType() != VT);
1459  // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
1460  SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X);
1461  insertDAGNode(DAG, N, NewX);
1462  X = NewX;
1463  }
1464  SDLoc DL(N);
1465  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
1466  SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
1467  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
1468  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
1469 
1470  // Insert the new nodes into the topological ordering. We must do this in
1471  // a valid topological ordering as nothing is going to go back and re-sort
1472  // these nodes. We continually insert before 'N' in sequence as this is
1473  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1474  // hierarchy left to express.
1475  insertDAGNode(DAG, N, NewSRLAmt);
1476  insertDAGNode(DAG, N, NewSRL);
1477  insertDAGNode(DAG, N, NewSHLAmt);
1478  insertDAGNode(DAG, N, NewSHL);
1479  DAG.ReplaceAllUsesWith(N, NewSHL);
1480 
1481  AM.Scale = 1 << AMShiftAmt;
1482  AM.IndexReg = NewSRL;
1483  return false;
1484 }
1485 
1486 // Transform "(X >> SHIFT) & (MASK << C1)" to
1487 // "((X >> (SHIFT + C1)) & (MASK)) << C1". Everything before the SHL will be
1488 // matched to a BEXTR later. Returns false if the simplification is performed.
1490  uint64_t Mask,
1491  SDValue Shift, SDValue X,
1492  X86ISelAddressMode &AM,
1493  const X86Subtarget &Subtarget) {
1494  if (Shift.getOpcode() != ISD::SRL ||
1495  !isa<ConstantSDNode>(Shift.getOperand(1)) ||
1496  !Shift.hasOneUse() || !N.hasOneUse())
1497  return true;
1498 
1499  // Only do this if BEXTR will be matched by matchBEXTRFromAndImm.
1500  if (!Subtarget.hasTBM() &&
1501  !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR()))
1502  return true;
1503 
1504  // We need to ensure that mask is a continuous run of bits.
1505  if (!isShiftedMask_64(Mask)) return true;
1506 
1507  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1508 
1509  // The amount of shift we're trying to fit into the addressing mode is taken
1510  // from the trailing zeros of the mask.
1511  unsigned AMShiftAmt = countTrailingZeros(Mask);
1512 
1513  // There is nothing we can do here unless the mask is removing some bits.
1514  // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
1515  if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
1516 
1517  MVT VT = N.getSimpleValueType();
1518  SDLoc DL(N);
1519  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
1520  SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
1521  SDValue NewMask = DAG.getConstant(Mask >> AMShiftAmt, DL, VT);
1522  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, NewSRL, NewMask);
1523  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
1524  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewAnd, NewSHLAmt);
1525 
1526  // Insert the new nodes into the topological ordering. We must do this in
1527  // a valid topological ordering as nothing is going to go back and re-sort
1528  // these nodes. We continually insert before 'N' in sequence as this is
1529  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1530  // hierarchy left to express.
1531  insertDAGNode(DAG, N, NewSRLAmt);
1532  insertDAGNode(DAG, N, NewSRL);
1533  insertDAGNode(DAG, N, NewMask);
1534  insertDAGNode(DAG, N, NewAnd);
1535  insertDAGNode(DAG, N, NewSHLAmt);
1536  insertDAGNode(DAG, N, NewSHL);
1537  DAG.ReplaceAllUsesWith(N, NewSHL);
1538 
1539  AM.Scale = 1 << AMShiftAmt;
1540  AM.IndexReg = NewAnd;
1541  return false;
1542 }
1543 
1544 bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
1545  unsigned Depth) {
1546  SDLoc dl(N);
1547  LLVM_DEBUG({
1548  dbgs() << "MatchAddress: ";
1549  AM.dump(CurDAG);
1550  });
1551  // Limit recursion.
1552  if (Depth > 5)
1553  return matchAddressBase(N, AM);
1554 
1555  // If this is already a %rip relative address, we can only merge immediates
1556  // into it. Instead of handling this in every case, we handle it here.
1557  // RIP relative addressing: %rip + 32-bit displacement!
1558  if (AM.isRIPRelative()) {
1559  // FIXME: JumpTable and ExternalSymbol address currently don't like
1560  // displacements. It isn't very important, but this should be fixed for
1561  // consistency.
1562  if (!(AM.ES || AM.MCSym) && AM.JT != -1)
1563  return true;
1564 
1565  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
1566  if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
1567  return false;
1568  return true;
1569  }
1570 
1571  switch (N.getOpcode()) {
1572  default: break;
1573  case ISD::LOCAL_RECOVER: {
1574  if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
1575  if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) {
1576  // Use the symbol and don't prefix it.
1577  AM.MCSym = ESNode->getMCSymbol();
1578  return false;
1579  }
1580  break;
1581  }
1582  case ISD::Constant: {
1583  uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
1584  if (!foldOffsetIntoAddress(Val, AM))
1585  return false;
1586  break;
1587  }
1588 
1589  case X86ISD::Wrapper:
1590  case X86ISD::WrapperRIP:
1591  if (!matchWrapper(N, AM))
1592  return false;
1593  break;
1594 
1595  case ISD::LOAD:
1596  if (!matchLoadInAddress(cast<LoadSDNode>(N), AM))
1597  return false;
1598  break;
1599 
1600  case ISD::FrameIndex:
1601  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1602  AM.Base_Reg.getNode() == nullptr &&
1603  (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
1604  AM.BaseType = X86ISelAddressMode::FrameIndexBase;
1605  AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
1606  return false;
1607  }
1608  break;
1609 
1610  case ISD::SHL:
1611  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
1612  break;
1613 
1614  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1615  unsigned Val = CN->getZExtValue();
1616  // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
1617  // that the base operand remains free for further matching. If
1618  // the base doesn't end up getting used, a post-processing step
1619  // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
1620  if (Val == 1 || Val == 2 || Val == 3) {
1621  AM.Scale = 1 << Val;
1622  SDValue ShVal = N.getOperand(0);
1623 
1624  // Okay, we know that we have a scale by now. However, if the scaled
1625  // value is an add of something and a constant, we can fold the
1626  // constant into the disp field here.
1627  if (CurDAG->isBaseWithConstantOffset(ShVal)) {
1628  AM.IndexReg = ShVal.getOperand(0);
1629  ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getOperand(1));
1630  uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
1631  if (!foldOffsetIntoAddress(Disp, AM))
1632  return false;
1633  }
1634 
1635  AM.IndexReg = ShVal;
1636  return false;
1637  }
1638  }
1639  break;
1640 
1641  case ISD::SRL: {
1642  // Scale must not be used already.
1643  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1644 
1645  SDValue And = N.getOperand(0);
1646  if (And.getOpcode() != ISD::AND) break;
1647  SDValue X = And.getOperand(0);
1648 
1649  // We only handle up to 64-bit values here as those are what matter for
1650  // addressing mode optimizations.
1651  if (X.getSimpleValueType().getSizeInBits() > 64) break;
1652 
1653  // The mask used for the transform is expected to be post-shift, but we
1654  // found the shift first so just apply the shift to the mask before passing
1655  // it down.
1656  if (!isa<ConstantSDNode>(N.getOperand(1)) ||
1657  !isa<ConstantSDNode>(And.getOperand(1)))
1658  break;
1659  uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1);
1660 
1661  // Try to fold the mask and shift into the scale, and return false if we
1662  // succeed.
1663  if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
1664  return false;
1665  break;
1666  }
1667 
1668  case ISD::SMUL_LOHI:
1669  case ISD::UMUL_LOHI:
1670  // A mul_lohi where we need the low part can be folded as a plain multiply.
1671  if (N.getResNo() != 0) break;
1673  case ISD::MUL:
1674  case X86ISD::MUL_IMM:
1675  // X*[3,5,9] -> X+X*[2,4,8]
1676  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1677  AM.Base_Reg.getNode() == nullptr &&
1678  AM.IndexReg.getNode() == nullptr) {
1679  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1680  if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
1681  CN->getZExtValue() == 9) {
1682  AM.Scale = unsigned(CN->getZExtValue())-1;
1683 
1684  SDValue MulVal = N.getOperand(0);
1685  SDValue Reg;
1686 
1687  // Okay, we know that we have a scale by now. However, if the scaled
1688  // value is an add of something and a constant, we can fold the
1689  // constant into the disp field here.
1690  if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
1691  isa<ConstantSDNode>(MulVal.getOperand(1))) {
1692  Reg = MulVal.getOperand(0);
1693  ConstantSDNode *AddVal =
1694  cast<ConstantSDNode>(MulVal.getOperand(1));
1695  uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
1696  if (foldOffsetIntoAddress(Disp, AM))
1697  Reg = N.getOperand(0);
1698  } else {
1699  Reg = N.getOperand(0);
1700  }
1701 
1702  AM.IndexReg = AM.Base_Reg = Reg;
1703  return false;
1704  }
1705  }
1706  break;
1707 
1708  case ISD::SUB: {
1709  // Given A-B, if A can be completely folded into the address and
1710  // the index field with the index field unused, use -B as the index.
1711  // This is a win if a has multiple parts that can be folded into
1712  // the address. Also, this saves a mov if the base register has
1713  // other uses, since it avoids a two-address sub instruction, however
1714  // it costs an additional mov if the index register has other uses.
1715 
1716  // Add an artificial use to this node so that we can keep track of
1717  // it if it gets CSE'd with a different node.
1718  HandleSDNode Handle(N);
1719 
1720  // Test if the LHS of the sub can be folded.
1721  X86ISelAddressMode Backup = AM;
1722  if (matchAddressRecursively(N.getOperand(0), AM, Depth+1)) {
1723  AM = Backup;
1724  break;
1725  }
1726  // Test if the index field is free for use.
1727  if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
1728  AM = Backup;
1729  break;
1730  }
1731 
1732  int Cost = 0;
1733  SDValue RHS = Handle.getValue().getOperand(1);
1734  // If the RHS involves a register with multiple uses, this
1735  // transformation incurs an extra mov, due to the neg instruction
1736  // clobbering its operand.
1737  if (!RHS.getNode()->hasOneUse() ||
1738  RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
1739  RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
1740  RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
1741  (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
1742  RHS.getOperand(0).getValueType() == MVT::i32))
1743  ++Cost;
1744  // If the base is a register with multiple uses, this
1745  // transformation may save a mov.
1746  // FIXME: Don't rely on DELETED_NODEs.
1747  if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() &&
1748  AM.Base_Reg->getOpcode() != ISD::DELETED_NODE &&
1749  !AM.Base_Reg.getNode()->hasOneUse()) ||
1750  AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1751  --Cost;
1752  // If the folded LHS was interesting, this transformation saves
1753  // address arithmetic.
1754  if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
1755  ((AM.Disp != 0) && (Backup.Disp == 0)) +
1756  (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
1757  --Cost;
1758  // If it doesn't look like it may be an overall win, don't do it.
1759  if (Cost >= 0) {
1760  AM = Backup;
1761  break;
1762  }
1763 
1764  // Ok, the transformation is legal and appears profitable. Go for it.
1765  SDValue Zero = CurDAG->getConstant(0, dl, N.getValueType());
1766  SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
1767  AM.IndexReg = Neg;
1768  AM.Scale = 1;
1769 
1770  // Insert the new nodes into the topological ordering.
1771  insertDAGNode(*CurDAG, Handle.getValue(), Zero);
1772  insertDAGNode(*CurDAG, Handle.getValue(), Neg);
1773  return false;
1774  }
1775 
1776  case ISD::ADD:
1777  if (!matchAdd(N, AM, Depth))
1778  return false;
1779  break;
1780 
1781  case ISD::OR:
1782  // We want to look through a transform in InstCombine and DAGCombiner that
1783  // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'.
1784  // Example: (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3))
1785  // An 'lea' can then be used to match the shift (multiply) and add:
1786  // and $1, %esi
1787  // lea (%rsi, %rdi, 8), %rax
1788  if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) &&
1789  !matchAdd(N, AM, Depth))
1790  return false;
1791  break;
1792 
1793  case ISD::AND: {
1794  // Perform some heroic transforms on an and of a constant-count shift
1795  // with a constant to enable use of the scaled offset field.
1796 
1797  // Scale must not be used already.
1798  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1799 
1800  SDValue Shift = N.getOperand(0);
1801  if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break;
1802  SDValue X = Shift.getOperand(0);
1803 
1804  // We only handle up to 64-bit values here as those are what matter for
1805  // addressing mode optimizations.
1806  if (X.getSimpleValueType().getSizeInBits() > 64) break;
1807 
1808  if (!isa<ConstantSDNode>(N.getOperand(1)))
1809  break;
1810  uint64_t Mask = N.getConstantOperandVal(1);
1811 
1812  // Try to fold the mask and shift into an extract and scale.
1813  if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
1814  return false;
1815 
1816  // Try to fold the mask and shift directly into the scale.
1817  if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
1818  return false;
1819 
1820  // Try to swap the mask and shift to place shifts which can be done as
1821  // a scale on the outside of the mask.
1822  if (!foldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM))
1823  return false;
1824 
1825  // Try to fold the mask and shift into BEXTR and scale.
1826  if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget))
1827  return false;
1828 
1829  break;
1830  }
1831  }
1832 
1833  return matchAddressBase(N, AM);
1834 }
1835 
1836 /// Helper for MatchAddress. Add the specified node to the
1837 /// specified addressing mode without any further recursion.
1838 bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
1839  // Is the base register already occupied?
1840  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
1841  // If so, check to see if the scale index register is set.
1842  if (!AM.IndexReg.getNode()) {
1843  AM.IndexReg = N;
1844  AM.Scale = 1;
1845  return false;
1846  }
1847 
1848  // Otherwise, we cannot select it.
1849  return true;
1850  }
1851 
1852  // Default, generate it as a register.
1853  AM.BaseType = X86ISelAddressMode::RegBase;
1854  AM.Base_Reg = N;
1855  return false;
1856 }
1857 
1858 /// Helper for selectVectorAddr. Handles things that can be folded into a
1859 /// gather scatter address. The index register and scale should have already
1860 /// been handled.
1861 bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
1862  // TODO: Support other operations.
1863  switch (N.getOpcode()) {
1864  case ISD::Constant: {
1865  uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
1866  if (!foldOffsetIntoAddress(Val, AM))
1867  return false;
1868  break;
1869  }
1870  case X86ISD::Wrapper:
1871  if (!matchWrapper(N, AM))
1872  return false;
1873  break;
1874  }
1875 
1876  return matchAddressBase(N, AM);
1877 }
1878 
1879 bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
1880  SDValue &Scale, SDValue &Index,
1881  SDValue &Disp, SDValue &Segment) {
1882  X86ISelAddressMode AM;
1883  auto *Mgs = cast<X86MaskedGatherScatterSDNode>(Parent);
1884  AM.IndexReg = Mgs->getIndex();
1885  AM.Scale = cast<ConstantSDNode>(Mgs->getScale())->getZExtValue();
1886 
1887  unsigned AddrSpace = cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
1888  // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
1889  if (AddrSpace == 256)
1890  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1891  if (AddrSpace == 257)
1892  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1893  if (AddrSpace == 258)
1894  AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
1895 
1896  // Try to match into the base and displacement fields.
1897  if (matchVectorAddress(N, AM))
1898  return false;
1899 
1900  MVT VT = N.getSimpleValueType();
1901  if (AM.BaseType == X86ISelAddressMode::RegBase) {
1902  if (!AM.Base_Reg.getNode())
1903  AM.Base_Reg = CurDAG->getRegister(0, VT);
1904  }
1905 
1906  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
1907  return true;
1908 }
1909 
1910 /// Returns true if it is able to pattern match an addressing mode.
1911 /// It returns the operands which make up the maximal addressing mode it can
1912 /// match by reference.
1913 ///
1914 /// Parent is the parent node of the addr operand that is being matched. It
1915 /// is always a load, store, atomic node, or null. It is only null when
1916 /// checking memory operands for inline asm nodes.
1917 bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
1918  SDValue &Scale, SDValue &Index,
1919  SDValue &Disp, SDValue &Segment) {
1920  X86ISelAddressMode AM;
1921 
1922  if (Parent &&
1923  // This list of opcodes are all the nodes that have an "addr:$ptr" operand
1924  // that are not a MemSDNode, and thus don't have proper addrspace info.
1925  Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
1926  Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
1927  Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
1928  Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
1929  Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
1930  unsigned AddrSpace =
1931  cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
1932  // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
1933  if (AddrSpace == 256)
1934  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1935  if (AddrSpace == 257)
1936  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1937  if (AddrSpace == 258)
1938  AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
1939  }
1940 
1941  if (matchAddress(N, AM))
1942  return false;
1943 
1944  MVT VT = N.getSimpleValueType();
1945  if (AM.BaseType == X86ISelAddressMode::RegBase) {
1946  if (!AM.Base_Reg.getNode())
1947  AM.Base_Reg = CurDAG->getRegister(0, VT);
1948  }
1949 
1950  if (!AM.IndexReg.getNode())
1951  AM.IndexReg = CurDAG->getRegister(0, VT);
1952 
1953  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
1954  return true;
1955 }
1956 
1957 // We can only fold a load if all nodes between it and the root node have a
1958 // single use. If there are additional uses, we could end up duplicating the
1959 // load.
1960 static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *User) {
1961  while (User != Root) {
1962  if (!User->hasOneUse())
1963  return false;
1964  User = *User->use_begin();
1965  }
1966 
1967  return true;
1968 }
1969 
1970 /// Match a scalar SSE load. In particular, we want to match a load whose top
1971 /// elements are either undef or zeros. The load flavor is derived from the
1972 /// type of N, which is either v4f32 or v2f64.
1973 ///
1974 /// We also return:
1975 /// PatternChainNode: this is the matched node that has a chain input and
1976 /// output.
1977 bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root, SDNode *Parent,
1978  SDValue N, SDValue &Base,
1979  SDValue &Scale, SDValue &Index,
1980  SDValue &Disp, SDValue &Segment,
1981  SDValue &PatternNodeWithChain) {
1982  if (!hasSingleUsesFromRoot(Root, Parent))
1983  return false;
1984 
1985  // We can allow a full vector load here since narrowing a load is ok.
1986  if (ISD::isNON_EXTLoad(N.getNode())) {
1987  PatternNodeWithChain = N;
1988  if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1989  IsLegalToFold(PatternNodeWithChain, Parent, Root, OptLevel)) {
1990  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
1991  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
1992  Segment);
1993  }
1994  }
1995 
1996  // We can also match the special zero extended load opcode.
1997  if (N.getOpcode() == X86ISD::VZEXT_LOAD) {
1998  PatternNodeWithChain = N;
1999  if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
2000  IsLegalToFold(PatternNodeWithChain, Parent, Root, OptLevel)) {
2001  auto *MI = cast<MemIntrinsicSDNode>(PatternNodeWithChain);
2002  return selectAddr(MI, MI->getBasePtr(), Base, Scale, Index, Disp,
2003  Segment);
2004  }
2005  }
2006 
2007  // Need to make sure that the SCALAR_TO_VECTOR and load are both only used
2008  // once. Otherwise the load might get duplicated and the chain output of the
2009  // duplicate load will not be observed by all dependencies.
2010  if (N.getOpcode() == ISD::SCALAR_TO_VECTOR && N.getNode()->hasOneUse()) {
2011  PatternNodeWithChain = N.getOperand(0);
2012  if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
2013  IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
2014  IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
2015  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
2016  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
2017  Segment);
2018  }
2019  }
2020 
2021  // Also handle the case where we explicitly require zeros in the top
2022  // elements. This is a vector shuffle from the zero vector.
2023  if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
2024  // Check to see if the top elements are all zeros (or bitcast of zeros).
2026  N.getOperand(0).getNode()->hasOneUse()) {
2027  PatternNodeWithChain = N.getOperand(0).getOperand(0);
2028  if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
2029  IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
2030  IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
2031  // Okay, this is a zero extending load. Fold it.
2032  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
2033  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
2034  Segment);
2035  }
2036  }
2037 
2038  return false;
2039 }
2040 
2041 
2042 bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
2043  if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2044  uint64_t ImmVal = CN->getZExtValue();
2045  if (!isUInt<32>(ImmVal))
2046  return false;
2047 
2048  Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i64);
2049  return true;
2050  }
2051 
2052  // In static codegen with small code model, we can get the address of a label
2053  // into a register with 'movl'
2054  if (N->getOpcode() != X86ISD::Wrapper)
2055  return false;
2056 
2057  N = N.getOperand(0);
2058 
2059  // At least GNU as does not accept 'movl' for TPOFF relocations.
2060  // FIXME: We could use 'movl' when we know we are targeting MC.
2062  return false;
2063 
2064  Imm = N;
2065  if (N->getOpcode() != ISD::TargetGlobalAddress)
2066  return TM.getCodeModel() == CodeModel::Small;
2067 
2069  cast<GlobalAddressSDNode>(N)->getGlobal()->getAbsoluteSymbolRange();
2070  if (!CR)
2071  return TM.getCodeModel() == CodeModel::Small;
2072 
2073  return CR->getUnsignedMax().ult(1ull << 32);
2074 }
2075 
2076 bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
2077  SDValue &Scale, SDValue &Index,
2078  SDValue &Disp, SDValue &Segment) {
2079  // Save the debug loc before calling selectLEAAddr, in case it invalidates N.
2080  SDLoc DL(N);
2081 
2082  if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment))
2083  return false;
2084 
2086  if (RN && RN->getReg() == 0)
2087  Base = CurDAG->getRegister(0, MVT::i64);
2088  else if (Base.getValueType() == MVT::i32 && !dyn_cast<FrameIndexSDNode>(Base)) {
2089  // Base could already be %rip, particularly in the x32 ABI.
2090  Base = SDValue(CurDAG->getMachineNode(
2091  TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
2092  CurDAG->getTargetConstant(0, DL, MVT::i64),
2093  Base,
2094  CurDAG->getTargetConstant(X86::sub_32bit, DL, MVT::i32)),
2095  0);
2096  }
2097 
2098  RN = dyn_cast<RegisterSDNode>(Index);
2099  if (RN && RN->getReg() == 0)
2100  Index = CurDAG->getRegister(0, MVT::i64);
2101  else {
2102  assert(Index.getValueType() == MVT::i32 &&
2103  "Expect to be extending 32-bit registers for use in LEA");
2104  Index = SDValue(CurDAG->getMachineNode(
2105  TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
2106  CurDAG->getTargetConstant(0, DL, MVT::i64),
2107  Index,
2108  CurDAG->getTargetConstant(X86::sub_32bit, DL,
2109  MVT::i32)),
2110  0);
2111  }
2112 
2113  return true;
2114 }
2115 
2116 /// Calls SelectAddr and determines if the maximal addressing
2117 /// mode it matches can be cost effectively emitted as an LEA instruction.
2118 bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
2119  SDValue &Base, SDValue &Scale,
2120  SDValue &Index, SDValue &Disp,
2121  SDValue &Segment) {
2122  X86ISelAddressMode AM;
2123 
2124  // Save the DL and VT before calling matchAddress, it can invalidate N.
2125  SDLoc DL(N);
2126  MVT VT = N.getSimpleValueType();
2127 
2128  // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
2129  // segments.
2130  SDValue Copy = AM.Segment;
2131  SDValue T = CurDAG->getRegister(0, MVT::i32);
2132  AM.Segment = T;
2133  if (matchAddress(N, AM))
2134  return false;
2135  assert (T == AM.Segment);
2136  AM.Segment = Copy;
2137 
2138  unsigned Complexity = 0;
2139  if (AM.BaseType == X86ISelAddressMode::RegBase)
2140  if (AM.Base_Reg.getNode())
2141  Complexity = 1;
2142  else
2143  AM.Base_Reg = CurDAG->getRegister(0, VT);
2144  else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
2145  Complexity = 4;
2146 
2147  if (AM.IndexReg.getNode())
2148  Complexity++;
2149  else
2150  AM.IndexReg = CurDAG->getRegister(0, VT);
2151 
2152  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
2153  // a simple shift.
2154  if (AM.Scale > 1)
2155  Complexity++;
2156 
2157  // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
2158  // to a LEA. This is determined with some experimentation but is by no means
2159  // optimal (especially for code size consideration). LEA is nice because of
2160  // its three-address nature. Tweak the cost function again when we can run
2161  // convertToThreeAddress() at register allocation time.
2162  if (AM.hasSymbolicDisplacement()) {
2163  // For X86-64, always use LEA to materialize RIP-relative addresses.
2164  if (Subtarget->is64Bit())
2165  Complexity = 4;
2166  else
2167  Complexity += 2;
2168  }
2169 
2170  if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode()))
2171  Complexity++;
2172 
2173  // If it isn't worth using an LEA, reject it.
2174  if (Complexity <= 2)
2175  return false;
2176 
2177  getAddressOperands(AM, DL, Base, Scale, Index, Disp, Segment);
2178  return true;
2179 }
2180 
2181 /// This is only run on TargetGlobalTLSAddress nodes.
2182 bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
2183  SDValue &Scale, SDValue &Index,
2184  SDValue &Disp, SDValue &Segment) {
2186  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
2187 
2188  X86ISelAddressMode AM;
2189  AM.GV = GA->getGlobal();
2190  AM.Disp += GA->getOffset();
2191  AM.Base_Reg = CurDAG->getRegister(0, N.getValueType());
2192  AM.SymbolFlags = GA->getTargetFlags();
2193 
2194  if (N.getValueType() == MVT::i32) {
2195  AM.Scale = 1;
2196  AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
2197  } else {
2198  AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
2199  }
2200 
2201  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
2202  return true;
2203 }
2204 
2205 bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) {
2206  if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
2207  Op = CurDAG->getTargetConstant(CN->getAPIntValue(), SDLoc(CN),
2208  N.getValueType());
2209  return true;
2210  }
2211 
2212  // Keep track of the original value type and whether this value was
2213  // truncated. If we see a truncation from pointer type to VT that truncates
2214  // bits that are known to be zero, we can use a narrow reference.
2215  EVT VT = N.getValueType();
2216  bool WasTruncated = false;
2217  if (N.getOpcode() == ISD::TRUNCATE) {
2218  WasTruncated = true;
2219  N = N.getOperand(0);
2220  }
2221 
2222  if (N.getOpcode() != X86ISD::Wrapper)
2223  return false;
2224 
2225  // We can only use non-GlobalValues as immediates if they were not truncated,
2226  // as we do not have any range information. If we have a GlobalValue and the
2227  // address was not truncated, we can select it as an operand directly.
2228  unsigned Opc = N.getOperand(0)->getOpcode();
2229  if (Opc != ISD::TargetGlobalAddress || !WasTruncated) {
2230  Op = N.getOperand(0);
2231  // We can only select the operand directly if we didn't have to look past a
2232  // truncate.
2233  return !WasTruncated;
2234  }
2235 
2236  // Check that the global's range fits into VT.
2237  auto *GA = cast<GlobalAddressSDNode>(N.getOperand(0));
2238  Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
2239  if (!CR || CR->getUnsignedMax().uge(1ull << VT.getSizeInBits()))
2240  return false;
2241 
2242  // Okay, we can use a narrow reference.
2243  Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), VT,
2244  GA->getOffset(), GA->getTargetFlags());
2245  return true;
2246 }
2247 
2248 bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
2249  SDValue &Base, SDValue &Scale,
2250  SDValue &Index, SDValue &Disp,
2251  SDValue &Segment) {
2252  if (!ISD::isNON_EXTLoad(N.getNode()) ||
2253  !IsProfitableToFold(N, P, Root) ||
2254  !IsLegalToFold(N, P, Root, OptLevel))
2255  return false;
2256 
2257  return selectAddr(N.getNode(),
2258  N.getOperand(1), Base, Scale, Index, Disp, Segment);
2259 }
2260 
2261 /// Return an SDNode that returns the value of the global base register.
2262 /// Output instructions required to initialize the global base register,
2263 /// if necessary.
2264 SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
2265  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
2266  auto &DL = MF->getDataLayout();
2267  return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
2268 }
2269 
2270 bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const {
2271  if (N->getOpcode() == ISD::TRUNCATE)
2272  N = N->getOperand(0).getNode();
2273  if (N->getOpcode() != X86ISD::Wrapper)
2274  return false;
2275 
2276  auto *GA = dyn_cast<GlobalAddressSDNode>(N->getOperand(0));
2277  if (!GA)
2278  return false;
2279 
2280  Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
2281  return CR && CR->getSignedMin().sge(-1ull << Width) &&
2282  CR->getSignedMax().slt(1ull << Width);
2283 }
2284 
2285 static X86::CondCode getCondFromOpc(unsigned Opc) {
2287  if (CC == X86::COND_INVALID)
2288  CC = X86::getCondFromBranchOpc(Opc);
2289  if (CC == X86::COND_INVALID)
2290  CC = X86::getCondFromSETOpc(Opc);
2291  if (CC == X86::COND_INVALID)
2292  CC = X86::getCondFromCMovOpc(Opc);
2293 
2294  return CC;
2295 }
2296 
2297 /// Test whether the given X86ISD::CMP node has any users that use a flag
2298 /// other than ZF.
2299 bool X86DAGToDAGISel::onlyUsesZeroFlag(SDValue Flags) const {
2300  // Examine each user of the node.
2301  for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
2302  UI != UE; ++UI) {
2303  // Only check things that use the flags.
2304  if (UI.getUse().getResNo() != Flags.getResNo())
2305  continue;
2306  // Only examine CopyToReg uses that copy to EFLAGS.
2307  if (UI->getOpcode() != ISD::CopyToReg ||
2308  cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2309  return false;
2310  // Examine each user of the CopyToReg use.
2311  for (SDNode::use_iterator FlagUI = UI->use_begin(),
2312  FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
2313  // Only examine the Flag result.
2314  if (FlagUI.getUse().getResNo() != 1) continue;
2315  // Anything unusual: assume conservatively.
2316  if (!FlagUI->isMachineOpcode()) return false;
2317  // Examine the condition code of the user.
2318  X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode());
2319 
2320  switch (CC) {
2321  // Comparisons which only use the zero flag.
2322  case X86::COND_E: case X86::COND_NE:
2323  continue;
2324  // Anything else: assume conservatively.
2325  default:
2326  return false;
2327  }
2328  }
2329  }
2330  return true;
2331 }
2332 
2333 /// Test whether the given X86ISD::CMP node has any uses which require the SF
2334 /// flag to be accurate.
2335 bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const {
2336  // Examine each user of the node.
2337  for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
2338  UI != UE; ++UI) {
2339  // Only check things that use the flags.
2340  if (UI.getUse().getResNo() != Flags.getResNo())
2341  continue;
2342  // Only examine CopyToReg uses that copy to EFLAGS.
2343  if (UI->getOpcode() != ISD::CopyToReg ||
2344  cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2345  return false;
2346  // Examine each user of the CopyToReg use.
2347  for (SDNode::use_iterator FlagUI = UI->use_begin(),
2348  FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
2349  // Only examine the Flag result.
2350  if (FlagUI.getUse().getResNo() != 1) continue;
2351  // Anything unusual: assume conservatively.
2352  if (!FlagUI->isMachineOpcode()) return false;
2353  // Examine the condition code of the user.
2354  X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode());
2355 
2356  switch (CC) {
2357  // Comparisons which don't examine the SF flag.
2358  case X86::COND_A: case X86::COND_AE:
2359  case X86::COND_B: case X86::COND_BE:
2360  case X86::COND_E: case X86::COND_NE:
2361  case X86::COND_O: case X86::COND_NO:
2362  case X86::COND_P: case X86::COND_NP:
2363  continue;
2364  // Anything else: assume conservatively.
2365  default:
2366  return false;
2367  }
2368  }
2369  }
2370  return true;
2371 }
2372 
2374  switch (CC) {
2375  // Comparisons which don't examine the CF flag.
2376  case X86::COND_O: case X86::COND_NO:
2377  case X86::COND_E: case X86::COND_NE:
2378  case X86::COND_S: case X86::COND_NS:
2379  case X86::COND_P: case X86::COND_NP:
2380  case X86::COND_L: case X86::COND_GE:
2381  case X86::COND_G: case X86::COND_LE:
2382  return false;
2383  // Anything else: assume conservatively.
2384  default:
2385  return true;
2386  }
2387 }
2388 
2389 /// Test whether the given node which sets flags has any uses which require the
2390 /// CF flag to be accurate.
2391  bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const {
2392  // Examine each user of the node.
2393  for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
2394  UI != UE; ++UI) {
2395  // Only check things that use the flags.
2396  if (UI.getUse().getResNo() != Flags.getResNo())
2397  continue;
2398 
2399  unsigned UIOpc = UI->getOpcode();
2400 
2401  if (UIOpc == ISD::CopyToReg) {
2402  // Only examine CopyToReg uses that copy to EFLAGS.
2403  if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2404  return false;
2405  // Examine each user of the CopyToReg use.
2406  for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
2407  FlagUI != FlagUE; ++FlagUI) {
2408  // Only examine the Flag result.
2409  if (FlagUI.getUse().getResNo() != 1)
2410  continue;
2411  // Anything unusual: assume conservatively.
2412  if (!FlagUI->isMachineOpcode())
2413  return false;
2414  // Examine the condition code of the user.
2415  X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode());
2416 
2417  if (mayUseCarryFlag(CC))
2418  return false;
2419  }
2420 
2421  // This CopyToReg is ok. Move on to the next user.
2422  continue;
2423  }
2424 
2425  // This might be an unselected node. So look for the pre-isel opcodes that
2426  // use flags.
2427  unsigned CCOpNo;
2428  switch (UIOpc) {
2429  default:
2430  // Something unusual. Be conservative.
2431  return false;
2432  case X86ISD::SETCC: CCOpNo = 0; break;
2433  case X86ISD::SETCC_CARRY: CCOpNo = 0; break;
2434  case X86ISD::CMOV: CCOpNo = 2; break;
2435  case X86ISD::BRCOND: CCOpNo = 2; break;
2436  }
2437 
2438  X86::CondCode CC = (X86::CondCode)UI->getConstantOperandVal(CCOpNo);
2439  if (mayUseCarryFlag(CC))
2440  return false;
2441  }
2442  return true;
2443 }
2444 
2445 /// Check whether or not the chain ending in StoreNode is suitable for doing
2446 /// the {load; op; store} to modify transformation.
2448  SDValue StoredVal, SelectionDAG *CurDAG,
2449  unsigned LoadOpNo,
2450  LoadSDNode *&LoadNode,
2451  SDValue &InputChain) {
2452  // Is the stored value result 0 of the operation?
2453  if (StoredVal.getResNo() != 0) return false;
2454 
2455  // Are there other uses of the operation other than the store?
2456  if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
2457 
2458  // Is the store non-extending and non-indexed?
2459  if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
2460  return false;
2461 
2462  SDValue Load = StoredVal->getOperand(LoadOpNo);
2463  // Is the stored value a non-extending and non-indexed load?
2464  if (!ISD::isNormalLoad(Load.getNode())) return false;
2465 
2466  // Return LoadNode by reference.
2467  LoadNode = cast<LoadSDNode>(Load);
2468 
2469  // Is store the only read of the loaded value?
2470  if (!Load.hasOneUse())
2471  return false;
2472 
2473  // Is the address of the store the same as the load?
2474  if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
2475  LoadNode->getOffset() != StoreNode->getOffset())
2476  return false;
2477 
2478  bool FoundLoad = false;
2479  SmallVector<SDValue, 4> ChainOps;
2480  SmallVector<const SDNode *, 4> LoopWorklist;
2482  const unsigned int Max = 1024;
2483 
2484  // Visualization of Load-Op-Store fusion:
2485  // -------------------------
2486  // Legend:
2487  // *-lines = Chain operand dependencies.
2488  // |-lines = Normal operand dependencies.
2489  // Dependencies flow down and right. n-suffix references multiple nodes.
2490  //
2491  // C Xn C
2492  // * * *
2493  // * * *
2494  // Xn A-LD Yn TF Yn
2495  // * * \ | * |
2496  // * * \ | * |
2497  // * * \ | => A--LD_OP_ST
2498  // * * \| \
2499  // TF OP \
2500  // * | \ Zn
2501  // * | \
2502  // A-ST Zn
2503  //
2504 
2505  // This merge induced dependences from: #1: Xn -> LD, OP, Zn
2506  // #2: Yn -> LD
2507  // #3: ST -> Zn
2508 
2509  // Ensure the transform is safe by checking for the dual
2510  // dependencies to make sure we do not induce a loop.
2511 
2512  // As LD is a predecessor to both OP and ST we can do this by checking:
2513  // a). if LD is a predecessor to a member of Xn or Yn.
2514  // b). if a Zn is a predecessor to ST.
2515 
2516  // However, (b) can only occur through being a chain predecessor to
2517  // ST, which is the same as Zn being a member or predecessor of Xn,
2518  // which is a subset of LD being a predecessor of Xn. So it's
2519  // subsumed by check (a).
2520 
2521  SDValue Chain = StoreNode->getChain();
2522 
2523  // Gather X elements in ChainOps.
2524  if (Chain == Load.getValue(1)) {
2525  FoundLoad = true;
2526  ChainOps.push_back(Load.getOperand(0));
2527  } else if (Chain.getOpcode() == ISD::TokenFactor) {
2528  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
2529  SDValue Op = Chain.getOperand(i);
2530  if (Op == Load.getValue(1)) {
2531  FoundLoad = true;
2532  // Drop Load, but keep its chain. No cycle check necessary.
2533  ChainOps.push_back(Load.getOperand(0));
2534  continue;
2535  }
2536  LoopWorklist.push_back(Op.getNode());
2537  ChainOps.push_back(Op);
2538  }
2539  }
2540 
2541  if (!FoundLoad)
2542  return false;
2543 
2544  // Worklist is currently Xn. Add Yn to worklist.
2545  for (SDValue Op : StoredVal->ops())
2546  if (Op.getNode() != LoadNode)
2547  LoopWorklist.push_back(Op.getNode());
2548 
2549  // Check (a) if Load is a predecessor to Xn + Yn
2550  if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max,
2551  true))
2552  return false;
2553 
2554  InputChain =
2555  CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps);
2556  return true;
2557 }
2558 
2559 // Change a chain of {load; op; store} of the same value into a simple op
2560 // through memory of that value, if the uses of the modified value and its
2561 // address are suitable.
2562 //
2563 // The tablegen pattern memory operand pattern is currently not able to match
2564 // the case where the EFLAGS on the original operation are used.
2565 //
2566 // To move this to tablegen, we'll need to improve tablegen to allow flags to
2567 // be transferred from a node in the pattern to the result node, probably with
2568 // a new keyword. For example, we have this
2569 // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2570 // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2571 // (implicit EFLAGS)]>;
2572 // but maybe need something like this
2573 // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2574 // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2575 // (transferrable EFLAGS)]>;
2576 //
2577 // Until then, we manually fold these and instruction select the operation
2578 // here.
2579 bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
2580  StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
2581  SDValue StoredVal = StoreNode->getOperand(1);
2582  unsigned Opc = StoredVal->getOpcode();
2583 
2584  // Before we try to select anything, make sure this is memory operand size
2585  // and opcode we can handle. Note that this must match the code below that
2586  // actually lowers the opcodes.
2587  EVT MemVT = StoreNode->getMemoryVT();
2588  if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 &&
2589  MemVT != MVT::i8)
2590  return false;
2591 
2592  bool IsCommutable = false;
2593  switch (Opc) {
2594  default:
2595  return false;
2596  case X86ISD::SUB:
2597  case X86ISD::SBB:
2598  break;
2599  case X86ISD::ADD:
2600  case X86ISD::ADC:
2601  case X86ISD::AND:
2602  case X86ISD::OR:
2603  case X86ISD::XOR:
2604  IsCommutable = true;
2605  break;
2606  }
2607 
2608  unsigned LoadOpNo = 0;
2609  LoadSDNode *LoadNode = nullptr;
2610  SDValue InputChain;
2611  if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
2612  LoadNode, InputChain)) {
2613  if (!IsCommutable)
2614  return false;
2615 
2616  // This operation is commutable, try the other operand.
2617  LoadOpNo = 1;
2618  if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
2619  LoadNode, InputChain))
2620  return false;
2621  }
2622 
2623  SDValue Base, Scale, Index, Disp, Segment;
2624  if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp,
2625  Segment))
2626  return false;
2627 
2628  auto SelectOpcode = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16,
2629  unsigned Opc8) {
2630  switch (MemVT.getSimpleVT().SimpleTy) {
2631  case MVT::i64:
2632  return Opc64;
2633  case MVT::i32:
2634  return Opc32;
2635  case MVT::i16:
2636  return Opc16;
2637  case MVT::i8:
2638  return Opc8;
2639  default:
2640  llvm_unreachable("Invalid size!");
2641  }
2642  };
2643 
2644  MachineSDNode *Result;
2645  switch (Opc) {
2646  case X86ISD::ADD:
2647  case X86ISD::SUB:
2648  // Try to match inc/dec.
2649  if (!Subtarget->slowIncDec() ||
2650  CurDAG->getMachineFunction().getFunction().optForSize()) {
2651  bool IsOne = isOneConstant(StoredVal.getOperand(1));
2652  bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1));
2653  // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec.
2654  if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) {
2655  unsigned NewOpc =
2656  ((Opc == X86ISD::ADD) == IsOne)
2657  ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
2658  : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
2659  const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
2660  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
2661  MVT::Other, Ops);
2662  break;
2663  }
2664  }
2666  case X86ISD::ADC:
2667  case X86ISD::SBB:
2668  case X86ISD::AND:
2669  case X86ISD::OR:
2670  case X86ISD::XOR: {
2671  auto SelectRegOpcode = [SelectOpcode](unsigned Opc) {
2672  switch (Opc) {
2673  case X86ISD::ADD:
2674  return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr,
2675  X86::ADD8mr);
2676  case X86ISD::ADC:
2677  return SelectOpcode(X86::ADC64mr, X86::ADC32mr, X86::ADC16mr,
2678  X86::ADC8mr);
2679  case X86ISD::SUB:
2680  return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr,
2681  X86::SUB8mr);
2682  case X86ISD::SBB:
2683  return SelectOpcode(X86::SBB64mr, X86::SBB32mr, X86::SBB16mr,
2684  X86::SBB8mr);
2685  case X86ISD::AND:
2686  return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr,
2687  X86::AND8mr);
2688  case X86ISD::OR:
2689  return SelectOpcode(X86::OR64mr, X86::OR32mr, X86::OR16mr, X86::OR8mr);
2690  case X86ISD::XOR:
2691  return SelectOpcode(X86::XOR64mr, X86::XOR32mr, X86::XOR16mr,
2692  X86::XOR8mr);
2693  default:
2694  llvm_unreachable("Invalid opcode!");
2695  }
2696  };
2697  auto SelectImm8Opcode = [SelectOpcode](unsigned Opc) {
2698  switch (Opc) {
2699  case X86ISD::ADD:
2700  return SelectOpcode(X86::ADD64mi8, X86::ADD32mi8, X86::ADD16mi8, 0);
2701  case X86ISD::ADC:
2702  return SelectOpcode(X86::ADC64mi8, X86::ADC32mi8, X86::ADC16mi8, 0);
2703  case X86ISD::SUB:
2704  return SelectOpcode(X86::SUB64mi8, X86::SUB32mi8, X86::SUB16mi8, 0);
2705  case X86ISD::SBB:
2706  return SelectOpcode(X86::SBB64mi8, X86::SBB32mi8, X86::SBB16mi8, 0);
2707  case X86ISD::AND:
2708  return SelectOpcode(X86::AND64mi8, X86::AND32mi8, X86::AND16mi8, 0);
2709  case X86ISD::OR:
2710  return SelectOpcode(X86::OR64mi8, X86::OR32mi8, X86::OR16mi8, 0);
2711  case X86ISD::XOR:
2712  return SelectOpcode(X86::XOR64mi8, X86::XOR32mi8, X86::XOR16mi8, 0);
2713  default:
2714  llvm_unreachable("Invalid opcode!");
2715  }
2716  };
2717  auto SelectImmOpcode = [SelectOpcode](unsigned Opc) {
2718  switch (Opc) {
2719  case X86ISD::ADD:
2720  return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi,
2721  X86::ADD8mi);
2722  case X86ISD::ADC:
2723  return SelectOpcode(X86::ADC64mi32, X86::ADC32mi, X86::ADC16mi,
2724  X86::ADC8mi);
2725  case X86ISD::SUB:
2726  return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi,
2727  X86::SUB8mi);
2728  case X86ISD::SBB:
2729  return SelectOpcode(X86::SBB64mi32, X86::SBB32mi, X86::SBB16mi,
2730  X86::SBB8mi);
2731  case X86ISD::AND:
2732  return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi,
2733  X86::AND8mi);
2734  case X86ISD::OR:
2735  return SelectOpcode(X86::OR64mi32, X86::OR32mi, X86::OR16mi,
2736  X86::OR8mi);
2737  case X86ISD::XOR:
2738  return SelectOpcode(X86::XOR64mi32, X86::XOR32mi, X86::XOR16mi,
2739  X86::XOR8mi);
2740  default:
2741  llvm_unreachable("Invalid opcode!");
2742  }
2743  };
2744 
2745  unsigned NewOpc = SelectRegOpcode(Opc);
2746  SDValue Operand = StoredVal->getOperand(1-LoadOpNo);
2747 
2748  // See if the operand is a constant that we can fold into an immediate
2749  // operand.
2750  if (auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) {
2751  auto OperandV = OperandC->getAPIntValue();
2752 
2753  // Check if we can shrink the operand enough to fit in an immediate (or
2754  // fit into a smaller immediate) by negating it and switching the
2755  // operation.
2756  if ((Opc == X86ISD::ADD || Opc == X86ISD::SUB) &&
2757  ((MemVT != MVT::i8 && OperandV.getMinSignedBits() > 8 &&
2758  (-OperandV).getMinSignedBits() <= 8) ||
2759  (MemVT == MVT::i64 && OperandV.getMinSignedBits() > 32 &&
2760  (-OperandV).getMinSignedBits() <= 32)) &&
2761  hasNoCarryFlagUses(StoredVal.getValue(1))) {
2762  OperandV = -OperandV;
2763  Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD;
2764  }
2765 
2766  // First try to fit this into an Imm8 operand. If it doesn't fit, then try
2767  // the larger immediate operand.
2768  if (MemVT != MVT::i8 && OperandV.getMinSignedBits() <= 8) {
2769  Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
2770  NewOpc = SelectImm8Opcode(Opc);
2771  } else if (OperandV.getActiveBits() <= MemVT.getSizeInBits() &&
2772  (MemVT != MVT::i64 || OperandV.getMinSignedBits() <= 32)) {
2773  Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
2774  NewOpc = SelectImmOpcode(Opc);
2775  }
2776  }
2777 
2778  if (Opc == X86ISD::ADC || Opc == X86ISD::SBB) {
2779  SDValue CopyTo =
2780  CurDAG->getCopyToReg(InputChain, SDLoc(Node), X86::EFLAGS,
2781  StoredVal.getOperand(2), SDValue());
2782 
2783  const SDValue Ops[] = {Base, Scale, Index, Disp,
2784  Segment, Operand, CopyTo, CopyTo.getValue(1)};
2785  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
2786  Ops);
2787  } else {
2788  const SDValue Ops[] = {Base, Scale, Index, Disp,
2789  Segment, Operand, InputChain};
2790  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
2791  Ops);
2792  }
2793  break;
2794  }
2795  default:
2796  llvm_unreachable("Invalid opcode!");
2797  }
2798 
2799  MachineMemOperand *MemOps[] = {StoreNode->getMemOperand(),
2800  LoadNode->getMemOperand()};
2801  CurDAG->setNodeMemRefs(Result, MemOps);
2802 
2803  // Update Load Chain uses as well.
2804  ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1));
2805  ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
2806  ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
2807  CurDAG->RemoveDeadNode(Node);
2808  return true;
2809 }
2810 
2811 // See if this is an X & Mask that we can match to BEXTR/BZHI.
2812 // Where Mask is one of the following patterns:
2813 // a) x & (1 << nbits) - 1
2814 // b) x & ~(-1 << nbits)
2815 // c) x & (-1 >> (32 - y))
2816 // d) x << (32 - y) >> (32 - y)
2817 bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
2818  assert(
2819  (Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) &&
2820  "Should be either an and-mask, or right-shift after clearing high bits.");
2821 
2822  // BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one.
2823  if (!Subtarget->hasBMI() && !Subtarget->hasBMI2())
2824  return false;
2825 
2826  MVT NVT = Node->getSimpleValueType(0);
2827 
2828  // Only supported for 32 and 64 bits.
2829  if (NVT != MVT::i32 && NVT != MVT::i64)
2830  return false;
2831 
2832  unsigned Size = NVT.getSizeInBits();
2833 
2834  SDValue NBits;
2835 
2836  // If we have BMI2's BZHI, we are ok with muti-use patterns.
2837  // Else, if we only have BMI1's BEXTR, we require one-use.
2838  const bool CanHaveExtraUses = Subtarget->hasBMI2();
2839  auto checkUses = [CanHaveExtraUses](SDValue Op, unsigned NUses) {
2840  return CanHaveExtraUses ||
2841  Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo());
2842  };
2843  auto checkOneUse = [checkUses](SDValue Op) { return checkUses(Op, 1); };
2844  auto checkTwoUse = [checkUses](SDValue Op) { return checkUses(Op, 2); };
2845 
2846  // a) x & ((1 << nbits) + (-1))
2847  auto matchPatternA = [&checkOneUse, &NBits](SDValue Mask) -> bool {
2848  // Match `add`. Must only have one use!
2849  if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask))
2850  return false;
2851  // We should be adding all-ones constant (i.e. subtracting one.)
2852  if (!isAllOnesConstant(Mask->getOperand(1)))
2853  return false;
2854  // Match `1 << nbits`. Must only have one use!
2855  SDValue M0 = Mask->getOperand(0);
2856  if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
2857  return false;
2858  if (!isOneConstant(M0->getOperand(0)))
2859  return false;
2860  NBits = M0->getOperand(1);
2861  return true;
2862  };
2863 
2864  // b) x & ~(-1 << nbits)
2865  auto matchPatternB = [&checkOneUse, &NBits](SDValue Mask) -> bool {
2866  // Match `~()`. Must only have one use!
2867  if (!isBitwiseNot(Mask) || !checkOneUse(Mask))
2868  return false;
2869  // Match `-1 << nbits`. Must only have one use!
2870  SDValue M0 = Mask->getOperand(0);
2871  if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
2872  return false;
2873  if (!isAllOnesConstant(M0->getOperand(0)))
2874  return false;
2875  NBits = M0->getOperand(1);
2876  return true;
2877  };
2878 
2879  // Match potentially-truncated (bitwidth - y)
2880  auto matchShiftAmt = [checkOneUse, Size, &NBits](SDValue ShiftAmt) {
2881  // Skip over a truncate of the shift amount.
2882  if (ShiftAmt.getOpcode() == ISD::TRUNCATE) {
2883  ShiftAmt = ShiftAmt.getOperand(0);
2884  // The trunc should have been the only user of the real shift amount.
2885  if (!checkOneUse(ShiftAmt))
2886  return false;
2887  }
2888  // Match the shift amount as: (bitwidth - y). It should go away, too.
2889  if (ShiftAmt.getOpcode() != ISD::SUB)
2890  return false;
2891  auto V0 = dyn_cast<ConstantSDNode>(ShiftAmt.getOperand(0));
2892  if (!V0 || V0->getZExtValue() != Size)
2893  return false;
2894  NBits = ShiftAmt.getOperand(1);
2895  return true;
2896  };
2897 
2898  // c) x & (-1 >> (32 - y))
2899  auto matchPatternC = [&checkOneUse, matchShiftAmt](SDValue Mask) -> bool {
2900  // Match `l>>`. Must only have one use!
2901  if (Mask.getOpcode() != ISD::SRL || !checkOneUse(Mask))
2902  return false;
2903  // We should be shifting all-ones constant.
2904  if (!isAllOnesConstant(Mask.getOperand(0)))
2905  return false;
2906  SDValue M1 = Mask.getOperand(1);
2907  // The shift amount should not be used externally.
2908  if (!checkOneUse(M1))
2909  return false;
2910  return matchShiftAmt(M1);
2911  };
2912 
2913  SDValue X;
2914 
2915  // d) x << (32 - y) >> (32 - y)
2916  auto matchPatternD = [&checkOneUse, &checkTwoUse, matchShiftAmt,
2917  &X](SDNode *Node) -> bool {
2918  if (Node->getOpcode() != ISD::SRL)
2919  return false;
2920  SDValue N0 = Node->getOperand(0);
2921  if (N0->getOpcode() != ISD::SHL || !checkOneUse(N0))
2922  return false;
2923  SDValue N1 = Node->getOperand(1);
2924  SDValue N01 = N0->getOperand(1);
2925  // Both of the shifts must be by the exact same value.
2926  // There should not be any uses of the shift amount outside of the pattern.
2927  if (N1 != N01 || !checkTwoUse(N1))
2928  return false;
2929  if (!matchShiftAmt(N1))
2930  return false;
2931  X = N0->getOperand(0);
2932  return true;
2933  };
2934 
2935  auto matchLowBitMask = [&matchPatternA, &matchPatternB,
2936  &matchPatternC](SDValue Mask) -> bool {
2937  // FIXME: pattern c.
2938  return matchPatternA(Mask) || matchPatternB(Mask) || matchPatternC(Mask);
2939  };
2940 
2941  if (Node->getOpcode() == ISD::AND) {
2942  X = Node->getOperand(0);
2943  SDValue Mask = Node->getOperand(1);
2944 
2945  if (matchLowBitMask(Mask)) {
2946  // Great.
2947  } else {
2948  std::swap(X, Mask);
2949  if (!matchLowBitMask(Mask))
2950  return false;
2951  }
2952  } else if (!matchPatternD(Node))
2953  return false;
2954 
2955  SDLoc DL(Node);
2956 
2957  // If we do *NOT* have BMI2, let's find out if the if the 'X' is *logically*
2958  // shifted (potentially with one-use trunc inbetween),
2959  // and if so look past one-use truncation.
2960  MVT XVT = NVT;
2961  if (!Subtarget->hasBMI2() && X.getOpcode() == ISD::TRUNCATE &&
2962  X.hasOneUse() && X.getOperand(0).getOpcode() == ISD::SRL) {
2963  assert(NVT == MVT::i32 && "Expected target valuetype to be i32");
2964  X = X.getOperand(0);
2965  XVT = X.getSimpleValueType();
2966  assert(XVT == MVT::i64 && "Expected truncation from i64");
2967  }
2968 
2969  SDValue OrigNBits = NBits;
2970  if (NBits.getValueType() != XVT) {
2971  // Truncate the shift amount.
2972  NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits);
2973  insertDAGNode(*CurDAG, OrigNBits, NBits);
2974 
2975  // Insert 8-bit NBits into lowest 8 bits of XVT-sized (32 or 64-bit)
2976  // register. All the other bits are undefined, we do not care about them.
2977  SDValue ImplDef =
2978  SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, XVT), 0);
2979  insertDAGNode(*CurDAG, OrigNBits, ImplDef);
2980  NBits =
2981  CurDAG->getTargetInsertSubreg(X86::sub_8bit, DL, XVT, ImplDef, NBits);
2982  insertDAGNode(*CurDAG, OrigNBits, NBits);
2983  }
2984 
2985  if (Subtarget->hasBMI2()) {
2986  // Great, just emit the the BZHI..
2987  SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, XVT, X, NBits);
2988  ReplaceNode(Node, Extract.getNode());
2989  SelectCode(Extract.getNode());
2990  return true;
2991  }
2992 
2993  // Else, emitting BEXTR requires one more step.
2994  // The 'control' of BEXTR has the pattern of:
2995  // [15...8 bit][ 7...0 bit] location
2996  // [ bit count][ shift] name
2997  // I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11
2998 
2999  // Shift NBits left by 8 bits, thus producing 'control'.
3000  // This makes the low 8 bits to be zero.
3001  SDValue C8 = CurDAG->getConstant(8, DL, MVT::i8);
3002  SDValue Control = CurDAG->getNode(ISD::SHL, DL, XVT, NBits, C8);
3003  insertDAGNode(*CurDAG, OrigNBits, Control);
3004 
3005  // If the 'X' is *logically* shifted, we can fold that shift into 'control'.
3006  if (X.getOpcode() == ISD::SRL) {
3007  SDValue ShiftAmt = X.getOperand(1);
3008  X = X.getOperand(0);
3009 
3010  assert(ShiftAmt.getValueType() == MVT::i8 &&
3011  "Expected shift amount to be i8");
3012 
3013  // Now, *zero*-extend the shift amount. The bits 8...15 *must* be zero!
3014  SDValue OrigShiftAmt = ShiftAmt;
3015  ShiftAmt = CurDAG->getNode(ISD::ZERO_EXTEND, DL, XVT, ShiftAmt);
3016  insertDAGNode(*CurDAG, OrigShiftAmt, ShiftAmt);
3017 
3018  // And now 'or' these low 8 bits of shift amount into the 'control'.
3019  Control = CurDAG->getNode(ISD::OR, DL, XVT, Control, ShiftAmt);
3020  insertDAGNode(*CurDAG, OrigNBits, Control);
3021  }
3022 
3023  // And finally, form the BEXTR itself.
3024  SDValue Extract = CurDAG->getNode(X86ISD::BEXTR, DL, XVT, X, Control);
3025 
3026  // The 'X' was originally truncated. Do that now.
3027  if (XVT != NVT) {
3028  insertDAGNode(*CurDAG, OrigNBits, Extract);
3029  Extract = CurDAG->getNode(ISD::TRUNCATE, DL, NVT, Extract);
3030  }
3031 
3032  ReplaceNode(Node, Extract.getNode());
3033  SelectCode(Extract.getNode());
3034 
3035  return true;
3036 }
3037 
3038 // See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
3039 MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
3040  MVT NVT = Node->getSimpleValueType(0);
3041  SDLoc dl(Node);
3042 
3043  SDValue N0 = Node->getOperand(0);
3044  SDValue N1 = Node->getOperand(1);
3045 
3046  // If we have TBM we can use an immediate for the control. If we have BMI
3047  // we should only do this if the BEXTR instruction is implemented well.
3048  // Otherwise moving the control into a register makes this more costly.
3049  // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM
3050  // hoisting the move immediate would make it worthwhile with a less optimal
3051  // BEXTR?
3052  if (!Subtarget->hasTBM() &&
3053  !(Subtarget->hasBMI() && Subtarget->hasFastBEXTR()))
3054  return nullptr;
3055 
3056  // Must have a shift right.
3057  if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
3058  return nullptr;
3059 
3060  // Shift can't have additional users.
3061  if (!N0->hasOneUse())
3062  return nullptr;
3063 
3064  // Only supported for 32 and 64 bits.
3065  if (NVT != MVT::i32 && NVT != MVT::i64)
3066  return nullptr;
3067 
3068  // Shift amount and RHS of and must be constant.
3069  ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1);
3070  ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
3071  if (!MaskCst || !ShiftCst)
3072  return nullptr;
3073 
3074  // And RHS must be a mask.
3075  uint64_t Mask = MaskCst->getZExtValue();
3076  if (!isMask_64(Mask))
3077  return nullptr;
3078 
3079  uint64_t Shift = ShiftCst->getZExtValue();
3080  uint64_t MaskSize = countPopulation(Mask);
3081 
3082  // Don't interfere with something that can be handled by extracting AH.
3083  // TODO: If we are able to fold a load, BEXTR might still be better than AH.
3084  if (Shift == 8 && MaskSize == 8)
3085  return nullptr;
3086 
3087  // Make sure we are only using bits that were in the original value, not
3088  // shifted in.
3089  if (Shift + MaskSize > NVT.getSizeInBits())
3090  return nullptr;
3091 
3092  SDValue New = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT);
3093  unsigned ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri;
3094  unsigned MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi;
3095 
3096  // BMI requires the immediate to placed in a register.
3097  if (!Subtarget->hasTBM()) {
3098  ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr;
3099  MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm;
3100  unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
3101  New = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, New), 0);
3102  }
3103 
3104  MachineSDNode *NewNode;
3105  SDValue Input = N0->getOperand(0);
3106  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3107  if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3108  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, New, Input.getOperand(0) };
3109  SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other);
3110  NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3111  // Update the chain.
3112  ReplaceUses(Input.getValue(1), SDValue(NewNode, 1));
3113  // Record the mem-refs
3114  CurDAG->setNodeMemRefs(NewNode, {cast<LoadSDNode>(Input)->getMemOperand()});
3115  } else {
3116  NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, Input, New);
3117  }
3118 
3119  return NewNode;
3120 }
3121 
3122 // Emit a PCMISTR(I/M) instruction.
3123 MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc,
3124  bool MayFoldLoad, const SDLoc &dl,
3125  MVT VT, SDNode *Node) {
3126  SDValue N0 = Node->getOperand(0);
3127  SDValue N1 = Node->getOperand(1);
3128  SDValue Imm = Node->getOperand(2);
3129  const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
3130  Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
3131 
3132  // Try to fold a load. No need to check alignment.
3133  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3134  if (MayFoldLoad && tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3135  SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
3136  N1.getOperand(0) };
3137  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other);
3138  MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3139  // Update the chain.
3140  ReplaceUses(N1.getValue(1), SDValue(CNode, 2));
3141  // Record the mem-refs
3142  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
3143  return CNode;
3144  }
3145 
3146  SDValue Ops[] = { N0, N1, Imm };
3147  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32);
3148  MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
3149  return CNode;
3150 }
3151 
3152 // Emit a PCMESTR(I/M) instruction. Also return the Glue result in case we need
3153 // to emit a second instruction after this one. This is needed since we have two
3154 // copyToReg nodes glued before this and we need to continue that glue through.
3155 MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
3156  bool MayFoldLoad, const SDLoc &dl,
3157  MVT VT, SDNode *Node,
3158  SDValue &InFlag) {
3159  SDValue N0 = Node->getOperand(0);
3160  SDValue N2 = Node->getOperand(2);
3161  SDValue Imm = Node->getOperand(4);
3162  const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
3163  Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
3164 
3165  // Try to fold a load. No need to check alignment.
3166  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3167  if (MayFoldLoad && tryFoldLoad(Node, N2, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3168  SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
3169  N2.getOperand(0), InFlag };
3170  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other, MVT::Glue);
3171  MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3172  InFlag = SDValue(CNode, 3);
3173  // Update the chain.
3174  ReplaceUses(N2.getValue(1), SDValue(CNode, 2));
3175  // Record the mem-refs
3176  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N2)->getMemOperand()});
3177  return CNode;
3178  }
3179 
3180  SDValue Ops[] = { N0, N2, Imm, InFlag };
3181  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Glue);
3182  MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
3183  InFlag = SDValue(CNode, 2);
3184  return CNode;
3185 }
3186 
3187 bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3188  EVT VT = N->getValueType(0);
3189 
3190  // Only handle scalar shifts.
3191  if (VT.isVector())
3192  return false;
3193 
3194  // Narrower shifts only mask to 5 bits in hardware.
3195  unsigned Size = VT == MVT::i64 ? 64 : 32;
3196 
3197  SDValue OrigShiftAmt = N->getOperand(1);
3198  SDValue ShiftAmt = OrigShiftAmt;
3199  SDLoc DL(N);
3200 
3201  // Skip over a truncate of the shift amount.
3202  if (ShiftAmt->getOpcode() == ISD::TRUNCATE)
3203  ShiftAmt = ShiftAmt->getOperand(0);
3204 
3205  // This function is called after X86DAGToDAGISel::matchBitExtract(),
3206  // so we are not afraid that we might mess up BZHI/BEXTR pattern.
3207 
3208  SDValue NewShiftAmt;
3209  if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3210  SDValue Add0 = ShiftAmt->getOperand(0);
3211  SDValue Add1 = ShiftAmt->getOperand(1);
3212  // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3213  // to avoid the ADD/SUB.
3214  if (isa<ConstantSDNode>(Add1) &&
3215  cast<ConstantSDNode>(Add1)->getZExtValue() % Size == 0) {
3216  NewShiftAmt = Add0;
3217  // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3218  // generate a NEG instead of a SUB of a constant.
3219  } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3220  isa<ConstantSDNode>(Add0) &&
3221  cast<ConstantSDNode>(Add0)->getZExtValue() != 0 &&
3222  cast<ConstantSDNode>(Add0)->getZExtValue() % Size == 0) {
3223  // Insert a negate op.
3224  // TODO: This isn't guaranteed to replace the sub if there is a logic cone
3225  // that uses it that's not a shift.
3226  EVT SubVT = ShiftAmt.getValueType();
3227  SDValue Zero = CurDAG->getConstant(0, DL, SubVT);
3228  SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, Add1);
3229  NewShiftAmt = Neg;
3230 
3231  // Insert these operands into a valid topological order so they can
3232  // get selected independently.
3233  insertDAGNode(*CurDAG, OrigShiftAmt, Zero);
3234  insertDAGNode(*CurDAG, OrigShiftAmt, Neg);
3235  } else
3236  return false;
3237  } else
3238  return false;
3239 
3240  if (NewShiftAmt.getValueType() != MVT::i8) {
3241  // Need to truncate the shift amount.
3242  NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt);
3243  // Add to a correct topological ordering.
3244  insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
3245  }
3246 
3247  // Insert a new mask to keep the shift amount legal. This should be removed
3248  // by isel patterns.
3249  NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt,
3250  CurDAG->getConstant(Size - 1, DL, MVT::i8));
3251  // Place in a correct topological ordering.
3252  insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
3253 
3254  SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
3255  NewShiftAmt);
3256  if (UpdatedNode != N) {
3257  // If we found an existing node, we should replace ourselves with that node
3258  // and wait for it to be selected after its other users.
3259  ReplaceNode(N, UpdatedNode);
3260  return true;
3261  }
3262 
3263  // If the original shift amount is now dead, delete it so that we don't run
3264  // it through isel.
3265  if (OrigShiftAmt.getNode()->use_empty())
3266  CurDAG->RemoveDeadNode(OrigShiftAmt.getNode());
3267 
3268  // Now that we've optimized the shift amount, defer to normal isel to get
3269  // load folding and legacy vs BMI2 selection without repeating it here.
3270  SelectCode(N);
3271  return true;
3272 }
3273 
3274 /// If the high bits of an 'and' operand are known zero, try setting the
3275 /// high bits of an 'and' constant operand to produce a smaller encoding by
3276 /// creating a small, sign-extended negative immediate rather than a large
3277 /// positive one. This reverses a transform in SimplifyDemandedBits that
3278 /// shrinks mask constants by clearing bits. There is also a possibility that
3279 /// the 'and' mask can be made -1, so the 'and' itself is unnecessary. In that
3280 /// case, just replace the 'and'. Return 'true' if the node is replaced.
3281 bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) {
3282  // i8 is unshrinkable, i16 should be promoted to i32, and vector ops don't
3283  // have immediate operands.
3284  MVT VT = And->getSimpleValueType(0);
3285  if (VT != MVT::i32 && VT != MVT::i64)
3286  return false;
3287 
3288  auto *And1C = dyn_cast<ConstantSDNode>(And->getOperand(1));
3289  if (!And1C)
3290  return false;
3291 
3292  // Bail out if the mask constant is already negative. It's can't shrink more.
3293  // If the upper 32 bits of a 64 bit mask are all zeros, we have special isel
3294  // patterns to use a 32-bit and instead of a 64-bit and by relying on the
3295  // implicit zeroing of 32 bit ops. So we should check if the lower 32 bits
3296  // are negative too.
3297  APInt MaskVal = And1C->getAPIntValue();
3298  unsigned MaskLZ = MaskVal.countLeadingZeros();
3299  if (!MaskLZ || (VT == MVT::i64 && MaskLZ == 32))
3300  return false;
3301 
3302  // Don't extend into the upper 32 bits of a 64 bit mask.
3303  if (VT == MVT::i64 && MaskLZ >= 32) {
3304  MaskLZ -= 32;
3305  MaskVal = MaskVal.trunc(32);
3306  }
3307 
3308  SDValue And0 = And->getOperand(0);
3309  APInt HighZeros = APInt::getHighBitsSet(MaskVal.getBitWidth(), MaskLZ);
3310  APInt NegMaskVal = MaskVal | HighZeros;
3311 
3312  // If a negative constant would not allow a smaller encoding, there's no need
3313  // to continue. Only change the constant when we know it's a win.
3314  unsigned MinWidth = NegMaskVal.getMinSignedBits();
3315  if (MinWidth > 32 || (MinWidth > 8 && MaskVal.getMinSignedBits() <= 32))
3316  return false;
3317 
3318  // Extend masks if we truncated above.
3319  if (VT == MVT::i64 && MaskVal.getBitWidth() < 64) {
3320  NegMaskVal = NegMaskVal.zext(64);
3321  HighZeros = HighZeros.zext(64);
3322  }
3323 
3324  // The variable operand must be all zeros in the top bits to allow using the
3325  // new, negative constant as the mask.
3326  if (!CurDAG->MaskedValueIsZero(And0, HighZeros))
3327  return false;
3328 
3329  // Check if the mask is -1. In that case, this is an unnecessary instruction
3330  // that escaped earlier analysis.
3331  if (NegMaskVal.isAllOnesValue()) {
3332  ReplaceNode(And, And0.getNode());
3333  return true;
3334  }
3335 
3336  // A negative mask allows a smaller encoding. Create a new 'and' node.
3337  SDValue NewMask = CurDAG->getConstant(NegMaskVal, SDLoc(And), VT);
3338  SDValue NewAnd = CurDAG->getNode(ISD::AND, SDLoc(And), VT, And0, NewMask);
3339  ReplaceNode(And, NewAnd.getNode());
3340  SelectCode(NewAnd.getNode());
3341  return true;
3342 }
3343 
3344 void X86DAGToDAGISel::Select(SDNode *Node) {
3345  MVT NVT = Node->getSimpleValueType(0);
3346  unsigned Opcode = Node->getOpcode();
3347  SDLoc dl(Node);
3348 
3349  if (Node->isMachineOpcode()) {
3350  LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
3351  Node->setNodeId(-1);
3352  return; // Already selected.
3353  }
3354 
3355  switch (Opcode) {
3356  default: break;
3357  case ISD::BRIND: {
3358  if (Subtarget->isTargetNaCl())
3359  // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We
3360  // leave the instruction alone.
3361  break;
3362  if (Subtarget->isTarget64BitILP32()) {
3363  // Converts a 32-bit register to a 64-bit, zero-extended version of
3364  // it. This is needed because x86-64 can do many things, but jmp %r32
3365  // ain't one of them.
3366  const SDValue &Target = Node->getOperand(1);
3368  SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, EVT(MVT::i64));
3369  SDValue Brind = CurDAG->getNode(ISD::BRIND, dl, MVT::Other,
3370  Node->getOperand(0), ZextTarget);
3371  ReplaceNode(Node, Brind.getNode());
3372  SelectCode(ZextTarget.getNode());
3373  SelectCode(Brind.getNode());
3374  return;
3375  }
3376  break;
3377  }
3378  case X86ISD::GlobalBaseReg:
3379  ReplaceNode(Node, getGlobalBaseReg());
3380  return;
3381 
3382  case ISD::BITCAST:
3383  // Just drop all 128/256/512-bit bitcasts.
3384  if (NVT.is512BitVector() || NVT.is256BitVector() || NVT.is128BitVector() ||
3385  NVT == MVT::f128) {
3386  ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
3387  CurDAG->RemoveDeadNode(Node);
3388  return;
3389  }
3390  break;
3391 
3392  case X86ISD::BLENDV: {
3393  // BLENDV selects like a regular VSELECT.
3394  SDValue VSelect = CurDAG->getNode(
3395  ISD::VSELECT, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
3396  Node->getOperand(1), Node->getOperand(2));
3397  ReplaceNode(Node, VSelect.getNode());
3398  SelectCode(VSelect.getNode());
3399  // We already called ReplaceUses.
3400  return;
3401  }
3402 
3403  case ISD::SRL:
3404  if (matchBitExtract(Node))
3405  return;
3407  case ISD::SRA:
3408  case ISD::SHL:
3409  if (tryShiftAmountMod(Node))
3410  return;
3411  break;
3412 
3413  case ISD::AND:
3414  if (MachineSDNode *NewNode = matchBEXTRFromAndImm(Node)) {
3415  ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
3416  CurDAG->RemoveDeadNode(Node);
3417  return;
3418  }
3419  if (matchBitExtract(Node))
3420  return;
3421  if (AndImmShrink && shrinkAndImmediate(Node))
3422  return;
3423 
3425  case ISD::OR:
3426  case ISD::XOR: {
3427 
3428  // For operations of the form (x << C1) op C2, check if we can use a smaller
3429  // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
3430  SDValue N0 = Node->getOperand(0);
3431  SDValue N1 = Node->getOperand(1);
3432 
3433  if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse())
3434  break;
3435 
3436  // i8 is unshrinkable, i16 should be promoted to i32.
3437  if (NVT != MVT::i32 && NVT != MVT::i64)
3438  break;
3439 
3441  ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
3442  if (!Cst || !ShlCst)
3443  break;
3444 
3445  int64_t Val = Cst->getSExtValue();
3446  uint64_t ShlVal = ShlCst->getZExtValue();
3447 
3448  // Make sure that we don't change the operation by removing bits.
3449  // This only matters for OR and XOR, AND is unaffected.
3450  uint64_t RemovedBitsMask = (1ULL << ShlVal) - 1;
3451  if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
3452  break;
3453 
3454  unsigned ShlOp, AddOp, Op;
3455  MVT CstVT = NVT;
3456 
3457  // Check the minimum bitwidth for the new constant.
3458  // TODO: AND32ri is the same as AND64ri32 with zext imm.
3459  // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
3460  // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
3461  if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal))
3462  CstVT = MVT::i8;
3463  else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal))
3464  CstVT = MVT::i32;
3465 
3466  // Bail if there is no smaller encoding.
3467  if (NVT == CstVT)
3468  break;
3469 
3470  switch (NVT.SimpleTy) {
3471  default: llvm_unreachable("Unsupported VT!");
3472  case MVT::i32:
3473  assert(CstVT == MVT::i8);
3474  ShlOp = X86::SHL32ri;
3475  AddOp = X86::ADD32rr;
3476 
3477  switch (Opcode) {
3478  default: llvm_unreachable("Impossible opcode");
3479  case ISD::AND: Op = X86::AND32ri8; break;
3480  case ISD::OR: Op = X86::OR32ri8; break;
3481  case ISD::XOR: Op = X86::XOR32ri8; break;
3482  }
3483  break;
3484  case MVT::i64:
3485  assert(CstVT == MVT::i8 || CstVT == MVT::i32);
3486  ShlOp = X86::SHL64ri;
3487  AddOp = X86::ADD64rr;
3488 
3489  switch (Opcode) {
3490  default: llvm_unreachable("Impossible opcode");
3491  case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
3492  case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break;
3493  case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
3494  }
3495  break;
3496  }
3497 
3498  // Emit the smaller op and the shift.
3499  SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, dl, CstVT);
3500  SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
3501  if (ShlVal == 1)
3502  CurDAG->SelectNodeTo(Node, AddOp, NVT, SDValue(New, 0),
3503  SDValue(New, 0));
3504  else
3505  CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
3506  getI8Imm(ShlVal, dl));
3507  return;
3508  }
3509  case X86ISD::SMUL:
3510  // i16/i32/i64 are handled with isel patterns.
3511  if (NVT != MVT::i8)
3512  break;
3514  case X86ISD::UMUL: {
3515  SDValue N0 = Node->getOperand(0);
3516  SDValue N1 = Node->getOperand(1);
3517 
3518  unsigned LoReg, ROpc, MOpc;
3519  switch (NVT.SimpleTy) {
3520  default: llvm_unreachable("Unsupported VT!");
3521  case MVT::i8:
3522  LoReg = X86::AL;
3523  ROpc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r;
3524  MOpc = Opcode == X86ISD::SMUL ? X86::IMUL8m : X86::MUL8m;
3525  break;
3526  case MVT::i16:
3527  LoReg = X86::AX;
3528  ROpc = X86::MUL16r;
3529  MOpc = X86::MUL16m;
3530  break;
3531  case MVT::i32:
3532  LoReg = X86::EAX;
3533  ROpc = X86::MUL32r;
3534  MOpc = X86::MUL32m;
3535  break;
3536  case MVT::i64:
3537  LoReg = X86::RAX;
3538  ROpc = X86::MUL64r;
3539  MOpc = X86::MUL64m;
3540  break;
3541  }
3542 
3543  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3544  bool FoldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
3545  // Multiply is commmutative.
3546  if (!FoldedLoad) {
3547  FoldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
3548  if (FoldedLoad)
3549  std::swap(N0, N1);
3550  }
3551 
3552  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
3553  N0, SDValue()).getValue(1);
3554 
3555  MachineSDNode *CNode;
3556  if (FoldedLoad) {
3557  // i16/i32/i64 use an instruction that produces a low and high result even
3558  // though only the low result is used.
3559  SDVTList VTs;
3560  if (NVT == MVT::i8)
3561  VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
3562  else
3563  VTs = CurDAG->getVTList(NVT, NVT, MVT::i32, MVT::Other);
3564 
3565  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
3566  InFlag };
3567  CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3568 
3569  // Update the chain.
3570  ReplaceUses(N1.getValue(1), SDValue(CNode, NVT == MVT::i8 ? 2 : 3));
3571  // Record the mem-refs
3572  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
3573  } else {
3574  // i16/i32/i64 use an instruction that produces a low and high result even
3575  // though only the low result is used.
3576  SDVTList VTs;
3577  if (NVT == MVT::i8)
3578  VTs = CurDAG->getVTList(NVT, MVT::i32);
3579  else
3580  VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
3581 
3582  CNode = CurDAG->getMachineNode(ROpc, dl, VTs, {N1, InFlag});
3583  }
3584 
3585  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
3586  ReplaceUses(SDValue(Node, 1), SDValue(CNode, NVT == MVT::i8 ? 1 : 2));
3587  CurDAG->RemoveDeadNode(Node);
3588  return;
3589  }
3590 
3591  case ISD::SMUL_LOHI:
3592  case ISD::UMUL_LOHI: {
3593  SDValue N0 = Node->getOperand(0);
3594  SDValue N1 = Node->getOperand(1);
3595 
3596  unsigned Opc, MOpc;
3597  bool isSigned = Opcode == ISD::SMUL_LOHI;
3598  if (!isSigned) {
3599  switch (NVT.SimpleTy) {
3600  default: llvm_unreachable("Unsupported VT!");
3601  case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
3602  case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
3603  }
3604  } else {
3605  switch (NVT.SimpleTy) {
3606  default: llvm_unreachable("Unsupported VT!");
3607  case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
3608  case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
3609  }
3610  }
3611 
3612  unsigned SrcReg, LoReg, HiReg;
3613  switch (Opc) {
3614  default: llvm_unreachable("Unknown MUL opcode!");
3615  case X86::IMUL32r:
3616  case X86::MUL32r:
3617  SrcReg = LoReg = X86::EAX; HiReg = X86::EDX;
3618  break;
3619  case X86::IMUL64r:
3620  case X86::MUL64r:
3621  SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
3622  break;
3623  }
3624 
3625  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3626  bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
3627  // Multiply is commmutative.
3628  if (!foldedLoad) {
3629  foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
3630  if (foldedLoad)
3631  std::swap(N0, N1);
3632  }
3633 
3634  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
3635  N0, SDValue()).getValue(1);
3636  if (foldedLoad) {
3637  SDValue Chain;
3638  MachineSDNode *CNode = nullptr;
3639  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
3640  InFlag };
3641  SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
3642  CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3643  Chain = SDValue(CNode, 0);
3644  InFlag = SDValue(CNode, 1);
3645 
3646  // Update the chain.
3647  ReplaceUses(N1.getValue(1), Chain);
3648  // Record the mem-refs
3649  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
3650  } else {
3651  SDValue Ops[] = { N1, InFlag };
3652  SDVTList VTs = CurDAG->getVTList(MVT::Glue);
3653  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
3654  InFlag = SDValue(CNode, 0);
3655  }
3656 
3657  // Copy the low half of the result, if it is needed.
3658  if (!SDValue(Node, 0).use_empty()) {
3659  assert(LoReg && "Register for low half is not defined!");
3660  SDValue ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg,
3661  NVT, InFlag);
3662  InFlag = ResLo.getValue(2);
3663  ReplaceUses(SDValue(Node, 0), ResLo);
3664  LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);
3665  dbgs() << '\n');
3666  }
3667  // Copy the high half of the result, if it is needed.
3668  if (!SDValue(Node, 1).use_empty()) {
3669  assert(HiReg && "Register for high half is not defined!");
3670  SDValue ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg,
3671  NVT, InFlag);
3672  InFlag = ResHi.getValue(2);
3673  ReplaceUses(SDValue(Node, 1), ResHi);
3674  LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);
3675  dbgs() << '\n');
3676  }
3677 
3678  CurDAG->RemoveDeadNode(Node);
3679  return;
3680  }
3681 
3682  case ISD::SDIVREM:
3683  case ISD::UDIVREM: {
3684  SDValue N0 = Node->getOperand(0);
3685  SDValue N1 = Node->getOperand(1);
3686 
3687  unsigned Opc, MOpc;
3688  bool isSigned = Opcode == ISD::SDIVREM;
3689  if (!isSigned) {
3690  switch (NVT.SimpleTy) {
3691  default: llvm_unreachable("Unsupported VT!");
3692  case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
3693  case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
3694  case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
3695  case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
3696  }
3697  } else {
3698  switch (NVT.SimpleTy) {
3699  default: llvm_unreachable("Unsupported VT!");
3700  case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
3701  case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
3702  case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
3703  case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
3704  }
3705  }
3706 
3707  unsigned LoReg, HiReg, ClrReg;
3708  unsigned SExtOpcode;
3709  switch (NVT.SimpleTy) {
3710  default: llvm_unreachable("Unsupported VT!");
3711  case MVT::i8:
3712  LoReg = X86::AL; ClrReg = HiReg = X86::AH;
3713  SExtOpcode = X86::CBW;
3714  break;
3715  case MVT::i16:
3716  LoReg = X86::AX; HiReg = X86::DX;
3717  ClrReg = X86::DX;
3718  SExtOpcode = X86::CWD;
3719  break;
3720  case MVT::i32:
3721  LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
3722  SExtOpcode = X86::CDQ;
3723  break;
3724  case MVT::i64:
3725  LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
3726  SExtOpcode = X86::CQO;
3727  break;
3728  }
3729 
3730  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3731  bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
3732  bool signBitIsZero = CurDAG->SignBitIsZero(N0);
3733 
3734  SDValue InFlag;
3735  if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
3736  // Special case for div8, just use a move with zero extension to AX to
3737  // clear the upper 8 bits (AH).
3738  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain;
3739  MachineSDNode *Move;
3740  if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3741  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
3742  Move = CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
3743  MVT::Other, Ops);
3744  Chain = SDValue(Move, 1);
3745  ReplaceUses(N0.getValue(1), Chain);
3746  // Record the mem-refs
3747  CurDAG->setNodeMemRefs(Move, {cast<LoadSDNode>(N0)->getMemOperand()});
3748  } else {
3749  Move = CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0);
3750  Chain = CurDAG->getEntryNode();
3751  }
3752  Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, SDValue(Move, 0),
3753  SDValue());
3754  InFlag = Chain.getValue(1);
3755  } else {
3756  InFlag =
3757  CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
3758  LoReg, N0, SDValue()).getValue(1);
3759  if (isSigned && !signBitIsZero) {
3760  // Sign extend the low part into the high part.
3761  InFlag =
3762  SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
3763  } else {
3764  // Zero out the high part, effectively zero extending the input.
3765  SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
3766  switch (NVT.SimpleTy) {
3767  case MVT::i16:
3768  ClrNode =
3769  SDValue(CurDAG->getMachineNode(
3770  TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode,
3771  CurDAG->getTargetConstant(X86::sub_16bit, dl,
3772  MVT::i32)),
3773  0);
3774  break;
3775  case MVT::i32:
3776  break;
3777  case MVT::i64:
3778  ClrNode =
3779  SDValue(CurDAG->getMachineNode(
3780  TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
3781  CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode,
3782  CurDAG->getTargetConstant(X86::sub_32bit, dl,
3783  MVT::i32)),
3784  0);
3785  break;
3786  default:
3787  llvm_unreachable("Unexpected division source");
3788  }
3789 
3790  InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
3791  ClrNode, InFlag).getValue(1);
3792  }
3793  }
3794 
3795  if (foldedLoad) {
3796  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
3797  InFlag };
3798  MachineSDNode *CNode =
3799  CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
3800  InFlag = SDValue(CNode, 1);
3801  // Update the chain.
3802  ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
3803  // Record the mem-refs
3804  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
3805  } else {
3806  InFlag =
3807  SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
3808  }
3809 
3810  // Prevent use of AH in a REX instruction by explicitly copying it to
3811  // an ABCD_L register.
3812  //
3813  // The current assumption of the register allocator is that isel
3814  // won't generate explicit references to the GR8_ABCD_H registers. If
3815  // the allocator and/or the backend get enhanced to be more robust in
3816  // that regard, this can be, and should be, removed.
3817  if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) {
3818  SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8);
3819  unsigned AHExtOpcode =
3820  isSigned ? X86::MOVSX32rr8_NOREX : X86::MOVZX32rr8_NOREX;
3821 
3822  SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
3823  MVT::Glue, AHCopy, InFlag);
3824  SDValue Result(RNode, 0);
3825  InFlag = SDValue(RNode, 1);
3826 
3827  Result =
3828  CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
3829 
3830  ReplaceUses(SDValue(Node, 1), Result);
3831  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
3832  dbgs() << '\n');
3833  }
3834  // Copy the division (low) result, if it is needed.
3835  if (!SDValue(Node, 0).use_empty()) {
3836  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
3837  LoReg, NVT, InFlag);
3838  InFlag = Result.getValue(2);
3839  ReplaceUses(SDValue(Node, 0), Result);
3840  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
3841  dbgs() << '\n');
3842  }
3843  // Copy the remainder (high) result, if it is needed.
3844  if (!SDValue(Node, 1).use_empty()) {
3845  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
3846  HiReg, NVT, InFlag);
3847  InFlag = Result.getValue(2);
3848  ReplaceUses(SDValue(Node, 1), Result);
3849  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
3850  dbgs() << '\n');
3851  }
3852  CurDAG->RemoveDeadNode(Node);
3853  return;
3854  }
3855 
3856  case X86ISD::CMP: {
3857  SDValue N0 = Node->getOperand(0);
3858  SDValue N1 = Node->getOperand(1);
3859 
3860  // Optimizations for TEST compares.
3861  if (!isNullConstant(N1))
3862  break;
3863 
3864  // Save the original VT of the compare.
3865  MVT CmpVT = N0.getSimpleValueType();
3866 
3867  // If we are comparing (and (shr X, C, Mask) with 0, emit a BEXTR followed
3868  // by a test instruction. The test should be removed later by
3869  // analyzeCompare if we are using only the zero flag.
3870  // TODO: Should we check the users and use the BEXTR flags directly?
3871  if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
3872  if (MachineSDNode *NewNode = matchBEXTRFromAndImm(N0.getNode())) {
3873  unsigned TestOpc = CmpVT == MVT::i64 ? X86::TEST64rr
3874  : X86::TEST32rr;
3875  SDValue BEXTR = SDValue(NewNode, 0);
3876  NewNode = CurDAG->getMachineNode(TestOpc, dl, MVT::i32, BEXTR, BEXTR);
3877  ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
3878  CurDAG->RemoveDeadNode(Node);
3879  return;
3880  }
3881  }
3882 
3883  // We can peek through truncates, but we need to be careful below.
3884  if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
3885  N0 = N0.getOperand(0);
3886 
3887  // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
3888  // use a smaller encoding.
3889  // Look past the truncate if CMP is the only use of it.
3890  if (N0.getOpcode() == ISD::AND &&
3891  N0.getNode()->hasOneUse() &&
3892  N0.getValueType() != MVT::i8) {
3894  if (!C) break;
3895  uint64_t Mask = C->getZExtValue();
3896 
3897  // Check if we can replace AND+IMM64 with a shift. This is possible for
3898  // masks/ like 0xFF000000 or 0x00FFFFFF and if we care only about the zero
3899  // flag.
3900  if (CmpVT == MVT::i64 && !isInt<32>(Mask) &&
3901  onlyUsesZeroFlag(SDValue(Node, 0))) {
3902  if (isMask_64(~Mask)) {
3903  unsigned TrailingZeros = countTrailingZeros(Mask);
3904  SDValue Imm = CurDAG->getTargetConstant(TrailingZeros, dl, MVT::i64);
3905  SDValue Shift =
3906  SDValue(CurDAG->getMachineNode(X86::SHR64ri, dl, MVT::i64,
3907  N0.getOperand(0), Imm), 0);
3908  MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
3909  MVT::i32, Shift, Shift);
3910  ReplaceNode(Node, Test);
3911  return;
3912  }
3913  if (isMask_64(Mask)) {
3914  unsigned LeadingZeros = countLeadingZeros(Mask);
3915  SDValue Imm = CurDAG->getTargetConstant(LeadingZeros, dl, MVT::i64);
3916  SDValue Shift =
3917  SDValue(CurDAG->getMachineNode(X86::SHL64ri, dl, MVT::i64,
3918  N0.getOperand(0), Imm), 0);
3919  MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
3920  MVT::i32, Shift, Shift);
3921  ReplaceNode(Node, Test);
3922  return;
3923  }
3924  }
3925 
3926  MVT VT;
3927  int SubRegOp;
3928  unsigned ROpc, MOpc;
3929 
3930  // For each of these checks we need to be careful if the sign flag is
3931  // being used. It is only safe to use the sign flag in two conditions,
3932  // either the sign bit in the shrunken mask is zero or the final test
3933  // size is equal to the original compare size.
3934 
3935  if (isUInt<8>(Mask) &&
3936  (!(Mask & 0x80) || CmpVT == MVT::i8 ||
3937  hasNoSignFlagUses(SDValue(Node, 0)))) {
3938  // For example, convert "testl %eax, $8" to "testb %al, $8"
3939  VT = MVT::i8;
3940  SubRegOp = X86::sub_8bit;
3941  ROpc = X86::TEST8ri;
3942  MOpc = X86::TEST8mi;
3943  } else if (OptForMinSize && isUInt<16>(Mask) &&
3944  (!(Mask & 0x8000) || CmpVT == MVT::i16 ||
3945  hasNoSignFlagUses(SDValue(Node, 0)))) {
3946  // For example, "testl %eax, $32776" to "testw %ax, $32776".
3947  // NOTE: We only want to form TESTW instructions if optimizing for
3948  // min size. Otherwise we only save one byte and possibly get a length
3949  // changing prefix penalty in the decoders.
3950  VT = MVT::i16;
3951  SubRegOp = X86::sub_16bit;
3952  ROpc = X86::TEST16ri;
3953  MOpc = X86::TEST16mi;
3954  } else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 &&
3955  ((!(Mask & 0x80000000) &&
3956  // Without minsize 16-bit Cmps can get here so we need to
3957  // be sure we calculate the correct sign flag if needed.
3958  (CmpVT != MVT::i16 || !(Mask & 0x8000))) ||
3959  CmpVT == MVT::i32 ||
3960  hasNoSignFlagUses(SDValue(Node, 0)))) {
3961  // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
3962  // NOTE: We only want to run that transform if N0 is 32 or 64 bits.
3963  // Otherwize, we find ourselves in a position where we have to do
3964  // promotion. If previous passes did not promote the and, we assume
3965  // they had a good reason not to and do not promote here.
3966  VT = MVT::i32;
3967  SubRegOp = X86::sub_32bit;
3968  ROpc = X86::TEST32ri;
3969  MOpc = X86::TEST32mi;
3970  } else {
3971  // No eligible transformation was found.
3972  break;
3973  }
3974 
3975  // FIXME: We should be able to fold loads here.
3976 
3977  SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT);
3978  SDValue Reg = N0.getOperand(0);
3979 
3980  // Emit a testl or testw.
3981  MachineSDNode *NewNode;
3982  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3983  if (tryFoldLoad(Node, N0.getNode(), Reg, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3984  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
3985  Reg.getOperand(0) };
3986  NewNode = CurDAG->getMachineNode(MOpc, dl, MVT::i32, MVT::Other, Ops);
3987  // Update the chain.
3988  ReplaceUses(Reg.getValue(1), SDValue(NewNode, 1));
3989  // Record the mem-refs
3990  CurDAG->setNodeMemRefs(NewNode,
3991  {cast<LoadSDNode>(Reg)->getMemOperand()});
3992  } else {
3993  // Extract the subregister if necessary.
3994  if (N0.getValueType() != VT)
3995  Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg);
3996 
3997  NewNode = CurDAG->getMachineNode(ROpc, dl, MVT::i32, Reg, Imm);
3998  }
3999  // Replace CMP with TEST.
4000  ReplaceNode(Node, NewNode);
4001  return;
4002  }
4003  break;
4004  }
4005  case X86ISD::PCMPISTR: {
4006  if (!Subtarget->hasSSE42())
4007  break;
4008 
4009  bool NeedIndex = !SDValue(Node, 0).use_empty();
4010  bool NeedMask = !SDValue(Node, 1).use_empty();
4011  // We can't fold a load if we are going to make two instructions.
4012  bool MayFoldLoad = !NeedIndex || !NeedMask;
4013 
4014  MachineSDNode *CNode;
4015  if (NeedMask) {
4016  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrr : X86::PCMPISTRMrr;
4017  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrm : X86::PCMPISTRMrm;
4018  CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node);
4019  ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
4020  }
4021  if (NeedIndex || !NeedMask) {
4022  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : X86::PCMPISTRIrr;
4023  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrm : X86::PCMPISTRIrm;
4024  CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node);
4025  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
4026  }
4027 
4028  // Connect the flag usage to the last instruction created.
4029  ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
4030  CurDAG->RemoveDeadNode(Node);
4031  return;
4032  }
4033  case X86ISD::PCMPESTR: {
4034  if (!Subtarget->hasSSE42())
4035  break;
4036 
4037  // Copy the two implicit register inputs.
4038  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EAX,
4039  Node->getOperand(1),
4040  SDValue()).getValue(1);
4041  InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX,
4042  Node->getOperand(3), InFlag).getValue(1);
4043 
4044  bool NeedIndex = !SDValue(Node, 0).use_empty();
4045  bool NeedMask = !SDValue(Node, 1).use_empty();
4046  // We can't fold a load if we are going to make two instructions.
4047  bool MayFoldLoad = !NeedIndex || !NeedMask;
4048 
4049  MachineSDNode *CNode;
4050  if (NeedMask) {
4051  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrr : X86::PCMPESTRMrr;
4052  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrm : X86::PCMPESTRMrm;
4053  CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node,
4054  InFlag);
4055  ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
4056  }
4057  if (NeedIndex || !NeedMask) {
4058  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : X86::PCMPESTRIrr;
4059  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrm : X86::PCMPESTRIrm;
4060  CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node, InFlag);
4061  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
4062  }
4063  // Connect the flag usage to the last instruction created.
4064  ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
4065  CurDAG->RemoveDeadNode(Node);
4066  return;
4067  }
4068 
4069  case ISD::STORE:
4070  if (foldLoadStoreIntoMemOperand(Node))
4071  return;
4072  break;
4073  }
4074 
4075  SelectCode(Node);
4076 }
4077 
4078 bool X86DAGToDAGISel::
4079 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4080  std::vector<SDValue> &OutOps) {
4081  SDValue Op0, Op1, Op2, Op3, Op4;
4082  switch (ConstraintID) {
4083  default:
4084  llvm_unreachable("Unexpected asm memory constraint");
4086  // FIXME: It seems strange that 'i' is needed here since it's supposed to
4087  // be an immediate and not a memory constraint.
4089  case InlineAsm::Constraint_o: // offsetable ??
4090  case InlineAsm::Constraint_v: // not offsetable ??
4091  case InlineAsm::Constraint_m: // memory
4093  if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
4094  return true;
4095  break;
4096  }
4097 
4098  OutOps.push_back(Op0);
4099  OutOps.push_back(Op1);
4100  OutOps.push_back(Op2);
4101  OutOps.push_back(Op3);
4102  OutOps.push_back(Op4);
4103  return false;
4104 }
4105 
4106 /// This pass converts a legalized DAG into a X86-specific DAG,
4107 /// ready for instruction scheduling.
4109  CodeGenOpt::Level OptLevel) {
4110  return new X86DAGToDAGISel(TM, OptLevel);
4111 }
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:571
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:349
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:538
static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq)
Return true if call address is a load and it can be moved below CALLSEQ_START and the chains leading ...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, SDValue StoredVal, SelectionDAG *CurDAG, unsigned LoadOpNo, LoadSDNode *&LoadNode, SDValue &InputChain)
Check whether or not the chain ending in StoreNode is suitable for doing the {load; op; store} to mod...
EVT getValueType() const
Return the ValueType of the referenced return value.
Vector comparison generating mask bits for fp and integer signed and unsigned data types...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const SDValue & getOffset() const
bool isUndef() const
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:35
APInt getSignedMax() const
Return the largest signed value contained in the ConstantRange.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:42
This class represents lattice values for constants.
Definition: AllocatorList.h:24
static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N)
static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget)
Tail call return.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:42
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
bool isVector() const
Return true if this is a vector value type.
const SDValue & getBasePtr() const
CondCode getCondFromCMovOpc(unsigned Opc)
Return condition code of a CMov opcode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:858
X86 conditional moves.
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1204
unsigned Reg
CondCode getCondFromSETOpc(unsigned Opc)
Return condition code of a SET opcode.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
bool hasFastBEXTR() const
Definition: X86Subtarget.h:639
const SDValue & getChain() const
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:303
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:131
unsigned getAlignment() const
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:321
bool isBitwiseNot(SDValue V)
Returns true if V is a bitwise not operation.
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:811
STATISTIC(NumFunctions, "Total number of functions")
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
F(f)
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
Definition: ISDOpcodes.h:353
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:189
static bool MayFoldLoad(SDValue Op)
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1509
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
GlobalBaseReg - On Darwin, this node represents the result of the mflr at function entry...
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode *> &Visited, SmallVectorImpl< const SDNode *> &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:210
X86 compare and logical compare instructions.
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4298
The address of a basic block.
Definition: Constants.h:840
static cl::opt< bool > AndImmShrink("x86-and-imm-shrink", cl::init(true), cl::desc("Enable setting constant bits to reduce size of mask immediates"), cl::Hidden)
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
Dynamic (non-constant condition) vector blend where only the sign bits of the condition elements are ...
Shift and rotation operations.
Definition: ISDOpcodes.h:410
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:478
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef...
CallLoweringInfo & setChain(SDValue InChain)
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:170
op_iterator op_end() const
uint64_t getConstantOperandVal(unsigned i) const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
CondCode getCondFromBranchOpc(unsigned Opc)
SimpleValueType SimpleTy
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:304
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence, and carry arbitrary information that target might want to know.
Definition: ISDOpcodes.h:713
Position
Position to insert a new instruction relative to an existing instruction.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
bool hasVLX() const
Definition: X86Subtarget.h:657
unsigned getSizeInBits() const
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasExternalLinkage() const
Definition: GlobalValue.h:422
int64_t getSExtValue() const
static void InvalidateNodeId(SDNode *N)
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
X86 FP SETCC, similar to above, but with output as an i1 mask and with optional rounding mode...
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:411
#define T
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:429
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
op_iterator op_begin() const
static X86::CondCode getCondFromOpc(unsigned Opc)
ArrayRef< SDUse > ops() const
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
This class is used to represent ISD::STORE nodes.
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:636
#define P(N)
const SDValue & getBasePtr() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:166
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:343
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:120
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
LOCAL_RECOVER - Represents the llvm.localrecover intrinsic.
Definition: ISDOpcodes.h:81
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
bool isMachineOpcode() const
static bool mayUseCarryFlag(X86::CondCode CC)
unsigned getScalarSizeInBits() const
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1185
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
const SDValue & getOperand(unsigned Num) const
X86 conditional branches.
bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const SDValue & getOffset() const
APInt getUnsignedMax() const
Return the largest unsigned value contained in the ConstantRange.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:161
This class provides iterator support for SDUse operands that use a specific SDNode.
unsigned getMachineOpcode() const
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:598
void RepositionNode(allnodes_iterator Position, SDNode *N)
Move node N in the AllNodes list to be immediately before the given iterator Position.
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using &#39;From&#39; to use &#39;To&#39; instead.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
On Darwin, this node represents the result of the popl at function entry, used for PIC code...
self_iterator getIterator()
Definition: ilist_node.h:82
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:556
std::vector< ArgListEntry > ArgListTy
Extended Value Type.
Definition: ValueTypes.h:34
This structure contains all information that is necessary for lowering calls.
bool isVolatile() const
APInt getSignedMin() const
Return the smallest signed value contained in the ConstantRange.
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
BaseType
A given derived pointer can have multiple base pointers through phi/selects.
bool use_empty() const
Return true if there are no uses of this node.
These operations represent an abstract X86 call instruction, which includes a bunch of information...
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
void dump() const
Dump this node, for debugging.
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:520
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM, const X86Subtarget &Subtarget)
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:309
bool isScalarFPTypeInSSEReg(EVT VT) const
Return true if the specified scalar FP type is computed in an SSE register, not on the X87 floating p...
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:222
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
const DataFlowGraph & G
Definition: RDFGraph.cpp:211
An SDNode that represents everything that will be needed to construct a MachineInstr.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
EVT getMemoryVT() const
Return the type of the in-memory value.
Target - Wrapper for Target specific information.
Class for arbitrary precision integers.
Definition: APInt.h:70
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
Definition: APInt.h:1309
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:468
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:471
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:394
static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *User)
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:187
int getNodeId() const
Return the unique node id.
static bool isDispSafeForFrameIndex(int64_t Val)
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:423
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
const SDValue & getValue() const
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:387
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:206
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:182
A wrapper node for TargetConstantPool, TargetJumpTable, TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, MCSymbol and TargetBlockAddress.
FunctionPass * createX86ISelDag(X86TargetMachine &TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a X86-specific DAG, ready for instruction scheduling...
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:614
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:595
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
bool is512BitVector() const
Return true if this is a 512-bit vector type.
uint32_t Size
Definition: Profile.cpp:47
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
unsigned getOpcode() const
SDValue getValue(unsigned R) const
XOP - Opcode prefix used by XOP instructions.
Definition: X86BaseInfo.h:527
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:346
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:175
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
const MachinePointerInfo & getPointerInfo() const
This class is used to form a handle around another node that is persistent and is updated across invo...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getReg() const
unsigned getMinSignedBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1552
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
bool hasBMI() const
Definition: X86Subtarget.h:592
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
unsigned getResNo() const
get the index which selects a specific result in the SDNode
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
bool isNonTemporal() const
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:443
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
Definition: APInt.h:1596
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, bool hasSymbolicDisplacement=true)
Returns true of the given offset can be fit into displacement field of the instruction.
unsigned getNumOperands() const
const SDValue & getOperand(unsigned i) const
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:474
#define LLVM_DEBUG(X)
Definition: Debug.h:123
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
bool hasTBM() const
Definition: X86Subtarget.h:585
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:375
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, SDValue Call, SDValue OrigChain)
Replace the original chain operand of the call with load&#39;s chain operand and move load below the call...
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:242
Special wrapper used under X86-64 PIC mode for RIP relative displacements.
BRIND - Indirect branch.
Definition: ISDOpcodes.h:634
This class is used to represent ISD::LOAD nodes.
static int getUninvalidatedNodeId(SDNode *N)