LLVM  8.0.1
AMDGPUISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// Defines an instruction selector for the AMDGPU target.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
17 #include "AMDGPUISelLowering.h" // For AMDGPUISD
18 #include "AMDGPUInstrInfo.h"
19 #include "AMDGPUPerfHintAnalysis.h"
20 #include "AMDGPURegisterInfo.h"
21 #include "AMDGPUSubtarget.h"
22 #include "AMDGPUTargetMachine.h"
23 #include "SIDefines.h"
24 #include "SIISelLowering.h"
25 #include "SIInstrInfo.h"
26 #include "SIMachineFunctionInfo.h"
27 #include "SIRegisterInfo.h"
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/StringRef.h"
42 #include "llvm/IR/BasicBlock.h"
43 #include "llvm/IR/Instruction.h"
44 #include "llvm/MC/MCInstrDesc.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/CodeGen.h"
50 #include <cassert>
51 #include <cstdint>
52 #include <new>
53 #include <vector>
54 
55 using namespace llvm;
56 
57 namespace llvm {
58 
59 class R600InstrInfo;
60 
61 } // end namespace llvm
62 
63 //===----------------------------------------------------------------------===//
64 // Instruction Selector Implementation
65 //===----------------------------------------------------------------------===//
66 
67 namespace {
68 
69 /// AMDGPU specific code to select AMDGPU machine instructions for
70 /// SelectionDAG operations.
71 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
72  // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
73  // make the right decision when generating code for different targets.
74  const GCNSubtarget *Subtarget;
75  bool EnableLateStructurizeCFG;
76 
77 public:
78  explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
80  : SelectionDAGISel(*TM, OptLevel) {
81  EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
82  }
83  ~AMDGPUDAGToDAGISel() override = default;
84 
85  void getAnalysisUsage(AnalysisUsage &AU) const override {
90  }
91 
92  bool runOnMachineFunction(MachineFunction &MF) override;
93  void Select(SDNode *N) override;
94  StringRef getPassName() const override;
95  void PostprocessISelDAG() override;
96 
97 protected:
98  void SelectBuildVector(SDNode *N, unsigned RegClassID);
99 
100 private:
101  std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
102  bool isNoNanSrc(SDValue N) const;
103  bool isInlineImmediate(const SDNode *N) const;
104  bool isVGPRImm(const SDNode *N) const;
105  bool isUniformLoad(const SDNode *N) const;
106  bool isUniformBr(const SDNode *N) const;
107 
108  MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
109 
110  SDNode *glueCopyToM0(SDNode *N) const;
111 
112  const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
113  virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
114  virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
115  bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
116  unsigned OffsetBits) const;
117  bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
118  bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
119  SDValue &Offset1) const;
120  bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
121  SDValue &SOffset, SDValue &Offset, SDValue &Offen,
122  SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
123  SDValue &TFE) const;
124  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
125  SDValue &SOffset, SDValue &Offset, SDValue &GLC,
126  SDValue &SLC, SDValue &TFE) const;
127  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
128  SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
129  SDValue &SLC) const;
130  bool SelectMUBUFScratchOffen(SDNode *Parent,
131  SDValue Addr, SDValue &RSrc, SDValue &VAddr,
132  SDValue &SOffset, SDValue &ImmOffset) const;
133  bool SelectMUBUFScratchOffset(SDNode *Parent,
134  SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
135  SDValue &Offset) const;
136 
137  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
138  SDValue &Offset, SDValue &GLC, SDValue &SLC,
139  SDValue &TFE) const;
140  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
141  SDValue &Offset, SDValue &SLC) const;
142  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
143  SDValue &Offset) const;
144 
145  bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
146  SDValue &Offset, SDValue &SLC) const;
147  bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr,
148  SDValue &Offset, SDValue &SLC) const;
149 
150  template <bool IsSigned>
151  bool SelectFlatOffset(SDValue Addr, SDValue &VAddr,
152  SDValue &Offset, SDValue &SLC) const;
153 
154  bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
155  bool &Imm) const;
156  SDValue Expand32BitAddress(SDValue Addr) const;
157  bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
158  bool &Imm) const;
159  bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
160  bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
161  bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
162  bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
163  bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
164  bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
165 
166  bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
167  bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
168  bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
169  bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
170  bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
171  SDValue &Clamp, SDValue &Omod) const;
172  bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
173  SDValue &Clamp, SDValue &Omod) const;
174 
175  bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
176  SDValue &Clamp,
177  SDValue &Omod) const;
178 
179  bool SelectVOP3OMods(SDValue In, SDValue &Src,
180  SDValue &Clamp, SDValue &Omod) const;
181 
182  bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
183  bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
184  SDValue &Clamp) const;
185 
186  bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
187  bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods,
188  SDValue &Clamp) const;
189 
190  bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
191  bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
192  SDValue &Clamp) const;
193  bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
194  bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
195 
196  bool SelectHi16Elt(SDValue In, SDValue &Src) const;
197 
198  void SelectADD_SUB_I64(SDNode *N);
199  void SelectUADDO_USUBO(SDNode *N);
200  void SelectDIV_SCALE(SDNode *N);
201  void SelectMAD_64_32(SDNode *N);
202  void SelectFMA_W_CHAIN(SDNode *N);
203  void SelectFMUL_W_CHAIN(SDNode *N);
204 
205  SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
206  uint32_t Offset, uint32_t Width);
207  void SelectS_BFEFromShifts(SDNode *N);
208  void SelectS_BFE(SDNode *N);
209  bool isCBranchSCC(const SDNode *N) const;
210  void SelectBRCOND(SDNode *N);
211  void SelectFMAD_FMA(SDNode *N);
212  void SelectATOMIC_CMP_SWAP(SDNode *N);
213 
214 protected:
215  // Include the pieces autogenerated from the target description.
216 #include "AMDGPUGenDAGISel.inc"
217 };
218 
219 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
220  const R600Subtarget *Subtarget;
221 
222  bool isConstantLoad(const MemSDNode *N, int cbID) const;
223  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
224  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
225  SDValue& Offset);
226 public:
227  explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
228  AMDGPUDAGToDAGISel(TM, OptLevel) {}
229 
230  void Select(SDNode *N) override;
231 
232  bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
233  SDValue &Offset) override;
234  bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
235  SDValue &Offset) override;
236 
237  bool runOnMachineFunction(MachineFunction &MF) override;
238 protected:
239  // Include the pieces autogenerated from the target description.
240 #include "R600GenDAGISel.inc"
241 };
242 
243 } // end anonymous namespace
244 
245 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
246  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
250 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",
251  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
252 
253 /// This pass converts a legalized DAG into a AMDGPU-specific
254 // DAG, ready for instruction scheduling.
256  CodeGenOpt::Level OptLevel) {
257  return new AMDGPUDAGToDAGISel(TM, OptLevel);
258 }
259 
260 /// This pass converts a legalized DAG into a R600-specific
261 // DAG, ready for instruction scheduling.
263  CodeGenOpt::Level OptLevel) {
264  return new R600DAGToDAGISel(TM, OptLevel);
265 }
266 
267 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
268  Subtarget = &MF.getSubtarget<GCNSubtarget>();
270 }
271 
272 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
273  if (TM.Options.NoNaNsFPMath)
274  return true;
275 
276  // TODO: Move into isKnownNeverNaN
277  if (N->getFlags().isDefined())
278  return N->getFlags().hasNoNaNs();
279 
280  return CurDAG->isKnownNeverNaN(N);
281 }
282 
283 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
284  const SIInstrInfo *TII = Subtarget->getInstrInfo();
285 
286  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
287  return TII->isInlineConstant(C->getAPIntValue());
288 
289  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
290  return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
291 
292  return false;
293 }
294 
295 /// Determine the register class for \p OpNo
296 /// \returns The register class of the virtual register that will be used for
297 /// the given operand number \OpNo or NULL if the register class cannot be
298 /// determined.
299 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
300  unsigned OpNo) const {
301  if (!N->isMachineOpcode()) {
302  if (N->getOpcode() == ISD::CopyToReg) {
303  unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
305  MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
306  return MRI.getRegClass(Reg);
307  }
308 
309  const SIRegisterInfo *TRI
310  = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
311  return TRI->getPhysRegClass(Reg);
312  }
313 
314  return nullptr;
315  }
316 
317  switch (N->getMachineOpcode()) {
318  default: {
319  const MCInstrDesc &Desc =
320  Subtarget->getInstrInfo()->get(N->getMachineOpcode());
321  unsigned OpIdx = Desc.getNumDefs() + OpNo;
322  if (OpIdx >= Desc.getNumOperands())
323  return nullptr;
324  int RegClass = Desc.OpInfo[OpIdx].RegClass;
325  if (RegClass == -1)
326  return nullptr;
327 
328  return Subtarget->getRegisterInfo()->getRegClass(RegClass);
329  }
330  case AMDGPU::REG_SEQUENCE: {
331  unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
332  const TargetRegisterClass *SuperRC =
333  Subtarget->getRegisterInfo()->getRegClass(RCID);
334 
335  SDValue SubRegOp = N->getOperand(OpNo + 1);
336  unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
337  return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
338  SubRegIdx);
339  }
340  }
341 }
342 
343 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
344  if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS ||
345  !Subtarget->ldsRequiresM0Init())
346  return N;
347 
348  const SITargetLowering& Lowering =
349  *static_cast<const SITargetLowering*>(getTargetLowering());
350 
351  // Write max value to m0 before each load operation
352 
353  SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
354  CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
355 
356  SDValue Glue = M0.getValue(1);
357 
359  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
360  Ops.push_back(N->getOperand(i));
361  }
362  Ops.push_back(Glue);
363  return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
364 }
365 
366 MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
367  EVT VT) const {
368  SDNode *Lo = CurDAG->getMachineNode(
369  AMDGPU::S_MOV_B32, DL, MVT::i32,
370  CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
371  SDNode *Hi =
372  CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
373  CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
374  const SDValue Ops[] = {
375  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
376  SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
377  SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
378 
379  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
380 }
381 
382 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
383  switch (NumVectorElts) {
384  case 1:
385  return AMDGPU::SReg_32_XM0RegClassID;
386  case 2:
387  return AMDGPU::SReg_64RegClassID;
388  case 4:
389  return AMDGPU::SReg_128RegClassID;
390  case 8:
391  return AMDGPU::SReg_256RegClassID;
392  case 16:
393  return AMDGPU::SReg_512RegClassID;
394  }
395 
396  llvm_unreachable("invalid vector size");
397 }
398 
399 static bool getConstantValue(SDValue N, uint32_t &Out) {
400  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
401  Out = C->getAPIntValue().getZExtValue();
402  return true;
403  }
404 
405  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
406  Out = C->getValueAPF().bitcastToAPInt().getZExtValue();
407  return true;
408  }
409 
410  return false;
411 }
412 
413 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
414  EVT VT = N->getValueType(0);
415  unsigned NumVectorElts = VT.getVectorNumElements();
416  EVT EltVT = VT.getVectorElementType();
417  SDLoc DL(N);
418  SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
419 
420  if (NumVectorElts == 1) {
421  CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
422  RegClass);
423  return;
424  }
425 
426  assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
427  "supported yet");
428  // 16 = Max Num Vector Elements
429  // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
430  // 1 = Vector Register Class
431  SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
432 
433  RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
434  bool IsRegSeq = true;
435  unsigned NOps = N->getNumOperands();
436  for (unsigned i = 0; i < NOps; i++) {
437  // XXX: Why is this here?
438  if (isa<RegisterSDNode>(N->getOperand(i))) {
439  IsRegSeq = false;
440  break;
441  }
443  RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
444  RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
445  }
446  if (NOps != NumVectorElts) {
447  // Fill in the missing undef elements if this was a scalar_to_vector.
448  assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
449  MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
450  DL, EltVT);
451  for (unsigned i = NOps; i < NumVectorElts; ++i) {
453  RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
454  RegSeqArgs[1 + (2 * i) + 1] =
455  CurDAG->getTargetConstant(Sub, DL, MVT::i32);
456  }
457  }
458 
459  if (!IsRegSeq)
460  SelectCode(N);
461  CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
462 }
463 
465  unsigned int Opc = N->getOpcode();
466  if (N->isMachineOpcode()) {
467  N->setNodeId(-1);
468  return; // Already selected.
469  }
470 
471  if (isa<AtomicSDNode>(N) ||
472  (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
476  N = glueCopyToM0(N);
477 
478  switch (Opc) {
479  default:
480  break;
481  // We are selecting i64 ADD here instead of custom lower it during
482  // DAG legalization, so we can fold some i64 ADDs used for address
483  // calculation into the LOAD and STORE instructions.
484  case ISD::ADDC:
485  case ISD::ADDE:
486  case ISD::SUBC:
487  case ISD::SUBE: {
488  if (N->getValueType(0) != MVT::i64)
489  break;
490 
491  SelectADD_SUB_I64(N);
492  return;
493  }
494  case ISD::UADDO:
495  case ISD::USUBO: {
496  SelectUADDO_USUBO(N);
497  return;
498  }
500  SelectFMUL_W_CHAIN(N);
501  return;
502  }
503  case AMDGPUISD::FMA_W_CHAIN: {
504  SelectFMA_W_CHAIN(N);
505  return;
506  }
507 
509  case ISD::BUILD_VECTOR: {
510  EVT VT = N->getValueType(0);
511  unsigned NumVectorElts = VT.getVectorNumElements();
512  if (VT.getScalarSizeInBits() == 16) {
513  if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
514  uint32_t LHSVal, RHSVal;
515  if (getConstantValue(N->getOperand(0), LHSVal) &&
516  getConstantValue(N->getOperand(1), RHSVal)) {
517  uint32_t K = LHSVal | (RHSVal << 16);
518  CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT,
519  CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32));
520  return;
521  }
522  }
523 
524  break;
525  }
526 
528  unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
529  SelectBuildVector(N, RegClassID);
530  return;
531  }
532  case ISD::BUILD_PAIR: {
533  SDValue RC, SubReg0, SubReg1;
534  SDLoc DL(N);
535  if (N->getValueType(0) == MVT::i128) {
536  RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
537  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
538  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
539  } else if (N->getValueType(0) == MVT::i64) {
540  RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
541  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
542  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
543  } else {
544  llvm_unreachable("Unhandled value type for BUILD_PAIR");
545  }
546  const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
547  N->getOperand(1), SubReg1 };
548  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
549  N->getValueType(0), Ops));
550  return;
551  }
552 
553  case ISD::Constant:
554  case ISD::ConstantFP: {
555  if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
556  break;
557 
558  uint64_t Imm;
559  if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
560  Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
561  else {
562  ConstantSDNode *C = cast<ConstantSDNode>(N);
563  Imm = C->getZExtValue();
564  }
565 
566  SDLoc DL(N);
567  ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
568  return;
569  }
570  case ISD::LOAD:
571  case ISD::STORE:
572  case ISD::ATOMIC_LOAD:
573  case ISD::ATOMIC_STORE: {
574  N = glueCopyToM0(N);
575  break;
576  }
577 
578  case AMDGPUISD::BFE_I32:
579  case AMDGPUISD::BFE_U32: {
580  // There is a scalar version available, but unlike the vector version which
581  // has a separate operand for the offset and width, the scalar version packs
582  // the width and offset into a single operand. Try to move to the scalar
583  // version if the offsets are constant, so that we can try to keep extended
584  // loads of kernel arguments in SGPRs.
585 
586  // TODO: Technically we could try to pattern match scalar bitshifts of
587  // dynamic values, but it's probably not useful.
589  if (!Offset)
590  break;
591 
593  if (!Width)
594  break;
595 
596  bool Signed = Opc == AMDGPUISD::BFE_I32;
597 
598  uint32_t OffsetVal = Offset->getZExtValue();
599  uint32_t WidthVal = Width->getZExtValue();
600 
601  ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
602  SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
603  return;
604  }
605  case AMDGPUISD::DIV_SCALE: {
606  SelectDIV_SCALE(N);
607  return;
608  }
610  case AMDGPUISD::MAD_U64_U32: {
611  SelectMAD_64_32(N);
612  return;
613  }
614  case ISD::CopyToReg: {
615  const SITargetLowering& Lowering =
616  *static_cast<const SITargetLowering*>(getTargetLowering());
617  N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
618  break;
619  }
620  case ISD::AND:
621  case ISD::SRL:
622  case ISD::SRA:
624  if (N->getValueType(0) != MVT::i32)
625  break;
626 
627  SelectS_BFE(N);
628  return;
629  case ISD::BRCOND:
630  SelectBRCOND(N);
631  return;
632  case ISD::FMAD:
633  case ISD::FMA:
634  SelectFMAD_FMA(N);
635  return;
637  SelectATOMIC_CMP_SWAP(N);
638  return;
644  // Hack around using a legal type if f16 is illegal.
645  if (N->getValueType(0) == MVT::i32) {
647  N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
648  { N->getOperand(0), N->getOperand(1) });
649  SelectCode(N);
650  return;
651  }
652  }
653  }
654 
655  SelectCode(N);
656 }
657 
658 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
659  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
660  const Instruction *Term = BB->getTerminator();
661  return Term->getMetadata("amdgpu.uniform") ||
662  Term->getMetadata("structurizecfg.uniform");
663 }
664 
665 StringRef AMDGPUDAGToDAGISel::getPassName() const {
666  return "AMDGPU DAG->DAG Pattern Instruction Selection";
667 }
668 
669 //===----------------------------------------------------------------------===//
670 // Complex Patterns
671 //===----------------------------------------------------------------------===//
672 
673 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
674  SDValue &Offset) {
675  return false;
676 }
677 
678 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
679  SDValue &Offset) {
680  ConstantSDNode *C;
681  SDLoc DL(Addr);
682 
683  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
684  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
685  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
686  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
687  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
688  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
689  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
690  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
691  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
692  Base = Addr.getOperand(0);
693  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
694  } else {
695  Base = Addr;
696  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
697  }
698 
699  return true;
700 }
701 
702 // FIXME: Should only handle addcarry/subcarry
703 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
704  SDLoc DL(N);
705  SDValue LHS = N->getOperand(0);
706  SDValue RHS = N->getOperand(1);
707 
708  unsigned Opcode = N->getOpcode();
709  bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
710  bool ProduceCarry =
711  ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
712  bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
713 
714  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
715  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
716 
717  SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
718  DL, MVT::i32, LHS, Sub0);
719  SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
720  DL, MVT::i32, LHS, Sub1);
721 
722  SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
723  DL, MVT::i32, RHS, Sub0);
724  SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
725  DL, MVT::i32, RHS, Sub1);
726 
727  SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
728 
729  unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
730  unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
731 
732  SDNode *AddLo;
733  if (!ConsumeCarry) {
734  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
735  AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
736  } else {
737  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
738  AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
739  }
740  SDValue AddHiArgs[] = {
741  SDValue(Hi0, 0),
742  SDValue(Hi1, 0),
743  SDValue(AddLo, 1)
744  };
745  SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
746 
747  SDValue RegSequenceArgs[] = {
748  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
749  SDValue(AddLo,0),
750  Sub0,
751  SDValue(AddHi,0),
752  Sub1,
753  };
754  SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
755  MVT::i64, RegSequenceArgs);
756 
757  if (ProduceCarry) {
758  // Replace the carry-use
759  ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
760  }
761 
762  // Replace the remaining uses.
763  ReplaceNode(N, RegSequence);
764 }
765 
766 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
767  // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
768  // carry out despite the _i32 name. These were renamed in VI to _U32.
769  // FIXME: We should probably rename the opcodes here.
770  unsigned Opc = N->getOpcode() == ISD::UADDO ?
771  AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
772 
773  CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
774  { N->getOperand(0), N->getOperand(1) });
775 }
776 
777 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
778  SDLoc SL(N);
779  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
780  SDValue Ops[10];
781 
782  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
783  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
784  SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
785  Ops[8] = N->getOperand(0);
786  Ops[9] = N->getOperand(4);
787 
788  CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
789 }
790 
791 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
792  SDLoc SL(N);
793  // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
794  SDValue Ops[8];
795 
796  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
797  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
798  Ops[6] = N->getOperand(0);
799  Ops[7] = N->getOperand(3);
800 
801  CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
802 }
803 
804 // We need to handle this here because tablegen doesn't support matching
805 // instructions with multiple outputs.
806 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
807  SDLoc SL(N);
808  EVT VT = N->getValueType(0);
809 
810  assert(VT == MVT::f32 || VT == MVT::f64);
811 
812  unsigned Opc
813  = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
814 
815  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
816  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
817 }
818 
819 // We need to handle this here because tablegen doesn't support matching
820 // instructions with multiple outputs.
821 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
822  SDLoc SL(N);
823  bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
824  unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32;
825 
826  SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
827  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
828  Clamp };
829  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
830 }
831 
832 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
833  unsigned OffsetBits) const {
834  if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
835  (OffsetBits == 8 && !isUInt<8>(Offset)))
836  return false;
837 
838  if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
839  Subtarget->unsafeDSOffsetFoldingEnabled())
840  return true;
841 
842  // On Southern Islands instruction with a negative base value and an offset
843  // don't seem to work.
844  return CurDAG->SignBitIsZero(Base);
845 }
846 
847 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
848  SDValue &Offset) const {
849  SDLoc DL(Addr);
850  if (CurDAG->isBaseWithConstantOffset(Addr)) {
851  SDValue N0 = Addr.getOperand(0);
852  SDValue N1 = Addr.getOperand(1);
853  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
854  if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
855  // (add n0, c0)
856  Base = N0;
857  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
858  return true;
859  }
860  } else if (Addr.getOpcode() == ISD::SUB) {
861  // sub C, x -> add (sub 0, x), C
862  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
863  int64_t ByteOffset = C->getSExtValue();
864  if (isUInt<16>(ByteOffset)) {
865  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
866 
867  // XXX - This is kind of hacky. Create a dummy sub node so we can check
868  // the known bits in isDSOffsetLegal. We need to emit the selected node
869  // here, so this is thrown away.
870  SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
871  Zero, Addr.getOperand(1));
872 
873  if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
874  // FIXME: Select to VOP3 version for with-carry.
875  unsigned SubOp = Subtarget->hasAddNoCarry() ?
876  AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
877 
878  MachineSDNode *MachineSub
879  = CurDAG->getMachineNode(SubOp, DL, MVT::i32,
880  Zero, Addr.getOperand(1));
881 
882  Base = SDValue(MachineSub, 0);
883  Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
884  return true;
885  }
886  }
887  }
888  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
889  // If we have a constant address, prefer to put the constant into the
890  // offset. This can save moves to load the constant address since multiple
891  // operations can share the zero base address register, and enables merging
892  // into read2 / write2 instructions.
893 
894  SDLoc DL(Addr);
895 
896  if (isUInt<16>(CAddr->getZExtValue())) {
897  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
898  MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
899  DL, MVT::i32, Zero);
900  Base = SDValue(MovZero, 0);
901  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
902  return true;
903  }
904  }
905 
906  // default case
907  Base = Addr;
908  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
909  return true;
910 }
911 
912 // TODO: If offset is too big, put low 16-bit into offset.
913 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
914  SDValue &Offset0,
915  SDValue &Offset1) const {
916  SDLoc DL(Addr);
917 
918  if (CurDAG->isBaseWithConstantOffset(Addr)) {
919  SDValue N0 = Addr.getOperand(0);
920  SDValue N1 = Addr.getOperand(1);
921  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
922  unsigned DWordOffset0 = C1->getZExtValue() / 4;
923  unsigned DWordOffset1 = DWordOffset0 + 1;
924  // (add n0, c0)
925  if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
926  Base = N0;
927  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
928  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
929  return true;
930  }
931  } else if (Addr.getOpcode() == ISD::SUB) {
932  // sub C, x -> add (sub 0, x), C
933  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
934  unsigned DWordOffset0 = C->getZExtValue() / 4;
935  unsigned DWordOffset1 = DWordOffset0 + 1;
936 
937  if (isUInt<8>(DWordOffset0)) {
938  SDLoc DL(Addr);
939  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
940 
941  // XXX - This is kind of hacky. Create a dummy sub node so we can check
942  // the known bits in isDSOffsetLegal. We need to emit the selected node
943  // here, so this is thrown away.
944  SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
945  Zero, Addr.getOperand(1));
946 
947  if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
948  unsigned SubOp = Subtarget->hasAddNoCarry() ?
949  AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
950 
951  MachineSDNode *MachineSub
952  = CurDAG->getMachineNode(SubOp, DL, MVT::i32,
953  Zero, Addr.getOperand(1));
954 
955  Base = SDValue(MachineSub, 0);
956  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
957  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
958  return true;
959  }
960  }
961  }
962  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
963  unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
964  unsigned DWordOffset1 = DWordOffset0 + 1;
965  assert(4 * DWordOffset0 == CAddr->getZExtValue());
966 
967  if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
968  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
969  MachineSDNode *MovZero
970  = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
971  DL, MVT::i32, Zero);
972  Base = SDValue(MovZero, 0);
973  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
974  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
975  return true;
976  }
977  }
978 
979  // default case
980 
981  Base = Addr;
982  Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
983  Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
984  return true;
985 }
986 
987 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
988  SDValue &VAddr, SDValue &SOffset,
989  SDValue &Offset, SDValue &Offen,
990  SDValue &Idxen, SDValue &Addr64,
991  SDValue &GLC, SDValue &SLC,
992  SDValue &TFE) const {
993  // Subtarget prefers to use flat instruction
994  if (Subtarget->useFlatForGlobal())
995  return false;
996 
997  SDLoc DL(Addr);
998 
999  if (!GLC.getNode())
1000  GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1001  if (!SLC.getNode())
1002  SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1003  TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
1004 
1005  Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1006  Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1007  Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1008  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1009 
1010  ConstantSDNode *C1 = nullptr;
1011  SDValue N0 = Addr;
1012  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1013  C1 = cast<ConstantSDNode>(Addr.getOperand(1));
1014  if (isUInt<32>(C1->getZExtValue()))
1015  N0 = Addr.getOperand(0);
1016  else
1017  C1 = nullptr;
1018  }
1019 
1020  if (N0.getOpcode() == ISD::ADD) {
1021  // (add N2, N3) -> addr64, or
1022  // (add (add N2, N3), C1) -> addr64
1023  SDValue N2 = N0.getOperand(0);
1024  SDValue N3 = N0.getOperand(1);
1025  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1026 
1027  if (N2->isDivergent()) {
1028  if (N3->isDivergent()) {
1029  // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
1030  // addr64, and construct the resource from a 0 address.
1031  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1032  VAddr = N0;
1033  } else {
1034  // N2 is divergent, N3 is not.
1035  Ptr = N3;
1036  VAddr = N2;
1037  }
1038  } else {
1039  // N2 is not divergent.
1040  Ptr = N2;
1041  VAddr = N3;
1042  }
1043  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1044  } else if (N0->isDivergent()) {
1045  // N0 is divergent. Use it as the addr64, and construct the resource from a
1046  // 0 address.
1047  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1048  VAddr = N0;
1049  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1050  } else {
1051  // N0 -> offset, or
1052  // (N0 + C1) -> offset
1053  VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1054  Ptr = N0;
1055  }
1056 
1057  if (!C1) {
1058  // No offset.
1059  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1060  return true;
1061  }
1062 
1064  // Legal offset for instruction.
1065  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1066  return true;
1067  }
1068 
1069  // Illegal offset, store it in soffset.
1070  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1071  SOffset =
1072  SDValue(CurDAG->getMachineNode(
1073  AMDGPU::S_MOV_B32, DL, MVT::i32,
1074  CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1075  0);
1076  return true;
1077 }
1078 
1079 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1080  SDValue &VAddr, SDValue &SOffset,
1081  SDValue &Offset, SDValue &GLC,
1082  SDValue &SLC, SDValue &TFE) const {
1083  SDValue Ptr, Offen, Idxen, Addr64;
1084 
1085  // addr64 bit was removed for volcanic islands.
1086  if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
1087  return false;
1088 
1089  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1090  GLC, SLC, TFE))
1091  return false;
1092 
1093  ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1094  if (C->getSExtValue()) {
1095  SDLoc DL(Addr);
1096 
1097  const SITargetLowering& Lowering =
1098  *static_cast<const SITargetLowering*>(getTargetLowering());
1099 
1100  SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1101  return true;
1102  }
1103 
1104  return false;
1105 }
1106 
1107 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1108  SDValue &VAddr, SDValue &SOffset,
1109  SDValue &Offset,
1110  SDValue &SLC) const {
1111  SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
1112  SDValue GLC, TFE;
1113 
1114  return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
1115 }
1116 
1117 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
1118  auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
1119  return PSV && PSV->isStack();
1120 }
1121 
1122 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1123  const MachineFunction &MF = CurDAG->getMachineFunction();
1125 
1126  if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
1127  SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1128  FI->getValueType(0));
1129 
1130  // If we can resolve this to a frame index access, this is relative to the
1131  // frame pointer SGPR.
1132  return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(),
1133  MVT::i32));
1134  }
1135 
1136  // If we don't know this private access is a local stack object, it needs to
1137  // be relative to the entry point's scratch wave offset register.
1138  return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(),
1139  MVT::i32));
1140 }
1141 
1142 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1143  SDValue Addr, SDValue &Rsrc,
1144  SDValue &VAddr, SDValue &SOffset,
1145  SDValue &ImmOffset) const {
1146 
1147  SDLoc DL(Addr);
1148  MachineFunction &MF = CurDAG->getMachineFunction();
1150 
1151  Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1152 
1153  if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1154  unsigned Imm = CAddr->getZExtValue();
1155 
1156  SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1157  MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1158  DL, MVT::i32, HighBits);
1159  VAddr = SDValue(MovHighBits, 0);
1160 
1161  // In a call sequence, stores to the argument stack area are relative to the
1162  // stack pointer.
1163  const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1164  unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1166 
1167  SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1168  ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
1169  return true;
1170  }
1171 
1172  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1173  // (add n0, c1)
1174 
1175  SDValue N0 = Addr.getOperand(0);
1176  SDValue N1 = Addr.getOperand(1);
1177 
1178  // Offsets in vaddr must be positive if range checking is enabled.
1179  //
1180  // The total computation of vaddr + soffset + offset must not overflow. If
1181  // vaddr is negative, even if offset is 0 the sgpr offset add will end up
1182  // overflowing.
1183  //
1184  // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
1185  // always perform a range check. If a negative vaddr base index was used,
1186  // this would fail the range check. The overall address computation would
1187  // compute a valid address, but this doesn't happen due to the range
1188  // check. For out-of-bounds MUBUF loads, a 0 is returned.
1189  //
1190  // Therefore it should be safe to fold any VGPR offset on gfx9 into the
1191  // MUBUF vaddr, but not on older subtargets which can only do this if the
1192  // sign bit is known 0.
1193  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1195  (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1196  CurDAG->SignBitIsZero(N0))) {
1197  std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1198  ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1199  return true;
1200  }
1201  }
1202 
1203  // (node)
1204  std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1205  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1206  return true;
1207 }
1208 
1209 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1210  SDValue Addr,
1211  SDValue &SRsrc,
1212  SDValue &SOffset,
1213  SDValue &Offset) const {
1214  ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
1215  if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1216  return false;
1217 
1218  SDLoc DL(Addr);
1219  MachineFunction &MF = CurDAG->getMachineFunction();
1221 
1222  SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1223 
1224  const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1225  unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1227 
1228  // FIXME: Get from MachinePointerInfo? We should only be using the frame
1229  // offset if we know this is in a call sequence.
1230  SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1231 
1232  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1233  return true;
1234 }
1235 
1236 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1237  SDValue &SOffset, SDValue &Offset,
1238  SDValue &GLC, SDValue &SLC,
1239  SDValue &TFE) const {
1240  SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1241  const SIInstrInfo *TII =
1242  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1243 
1244  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1245  GLC, SLC, TFE))
1246  return false;
1247 
1248  if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1249  !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1250  !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1251  uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1252  APInt::getAllOnesValue(32).getZExtValue(); // Size
1253  SDLoc DL(Addr);
1254 
1255  const SITargetLowering& Lowering =
1256  *static_cast<const SITargetLowering*>(getTargetLowering());
1257 
1258  SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1259  return true;
1260  }
1261  return false;
1262 }
1263 
1264 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1265  SDValue &Soffset, SDValue &Offset
1266  ) const {
1267  SDValue GLC, SLC, TFE;
1268 
1269  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1270 }
1271 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1272  SDValue &Soffset, SDValue &Offset,
1273  SDValue &SLC) const {
1274  SDValue GLC, TFE;
1275 
1276  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1277 }
1278 
1279 template <bool IsSigned>
1280 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
1281  SDValue &VAddr,
1282  SDValue &Offset,
1283  SDValue &SLC) const {
1284  int64_t OffsetVal = 0;
1285 
1286  if (Subtarget->hasFlatInstOffsets() &&
1287  CurDAG->isBaseWithConstantOffset(Addr)) {
1288  SDValue N0 = Addr.getOperand(0);
1289  SDValue N1 = Addr.getOperand(1);
1290  int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1291 
1292  if ((IsSigned && isInt<13>(COffsetVal)) ||
1293  (!IsSigned && isUInt<12>(COffsetVal))) {
1294  Addr = N0;
1295  OffsetVal = COffsetVal;
1296  }
1297  }
1298 
1299  VAddr = Addr;
1300  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
1301  SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
1302 
1303  return true;
1304 }
1305 
1306 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr,
1307  SDValue &VAddr,
1308  SDValue &Offset,
1309  SDValue &SLC) const {
1310  return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC);
1311 }
1312 
1313 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr,
1314  SDValue &VAddr,
1315  SDValue &Offset,
1316  SDValue &SLC) const {
1317  return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC);
1318 }
1319 
1320 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1321  SDValue &Offset, bool &Imm) const {
1322 
1323  // FIXME: Handle non-constant offsets.
1324  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1325  if (!C)
1326  return false;
1327 
1328  SDLoc SL(ByteOffsetNode);
1329  GCNSubtarget::Generation Gen = Subtarget->getGeneration();
1330  int64_t ByteOffset = C->getSExtValue();
1331  int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
1332 
1333  if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) {
1334  Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1335  Imm = true;
1336  return true;
1337  }
1338 
1339  if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1340  return false;
1341 
1342  if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1343  // 32-bit Immediates are supported on Sea Islands.
1344  Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1345  } else {
1346  SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1347  Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1348  C32Bit), 0);
1349  }
1350  Imm = false;
1351  return true;
1352 }
1353 
1354 SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
1355  if (Addr.getValueType() != MVT::i32)
1356  return Addr;
1357 
1358  // Zero-extend a 32-bit address.
1359  SDLoc SL(Addr);
1360 
1361  const MachineFunction &MF = CurDAG->getMachineFunction();
1363  unsigned AddrHiVal = Info->get32BitAddressHighBits();
1364  SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
1365 
1366  const SDValue Ops[] = {
1367  CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
1368  Addr,
1369  CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
1370  SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
1371  0),
1372  CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
1373  };
1374 
1375  return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
1376  Ops), 0);
1377 }
1378 
1379 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1380  SDValue &Offset, bool &Imm) const {
1381  SDLoc SL(Addr);
1382 
1383  // A 32-bit (address + offset) should not cause unsigned 32-bit integer
1384  // wraparound, because s_load instructions perform the addition in 64 bits.
1385  if ((Addr.getValueType() != MVT::i32 ||
1386  Addr->getFlags().hasNoUnsignedWrap()) &&
1387  CurDAG->isBaseWithConstantOffset(Addr)) {
1388  SDValue N0 = Addr.getOperand(0);
1389  SDValue N1 = Addr.getOperand(1);
1390 
1391  if (SelectSMRDOffset(N1, Offset, Imm)) {
1392  SBase = Expand32BitAddress(N0);
1393  return true;
1394  }
1395  }
1396  SBase = Expand32BitAddress(Addr);
1397  Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1398  Imm = true;
1399  return true;
1400 }
1401 
1402 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1403  SDValue &Offset) const {
1404  bool Imm;
1405  return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1406 }
1407 
1408 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1409  SDValue &Offset) const {
1410 
1411  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1412  return false;
1413 
1414  bool Imm;
1415  if (!SelectSMRD(Addr, SBase, Offset, Imm))
1416  return false;
1417 
1418  return !Imm && isa<ConstantSDNode>(Offset);
1419 }
1420 
1421 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1422  SDValue &Offset) const {
1423  bool Imm;
1424  return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1425  !isa<ConstantSDNode>(Offset);
1426 }
1427 
1428 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1429  SDValue &Offset) const {
1430  bool Imm;
1431  return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1432 }
1433 
1434 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1435  SDValue &Offset) const {
1436  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1437  return false;
1438 
1439  bool Imm;
1440  if (!SelectSMRDOffset(Addr, Offset, Imm))
1441  return false;
1442 
1443  return !Imm && isa<ConstantSDNode>(Offset);
1444 }
1445 
1446 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
1447  SDValue &Base,
1448  SDValue &Offset) const {
1449  SDLoc DL(Index);
1450 
1451  if (CurDAG->isBaseWithConstantOffset(Index)) {
1452  SDValue N0 = Index.getOperand(0);
1453  SDValue N1 = Index.getOperand(1);
1454  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1455 
1456  // (add n0, c0)
1457  // Don't peel off the offset (c0) if doing so could possibly lead
1458  // the base (n0) to be negative.
1459  if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0)) {
1460  Base = N0;
1461  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
1462  return true;
1463  }
1464  }
1465 
1466  if (isa<ConstantSDNode>(Index))
1467  return false;
1468 
1469  Base = Index;
1470  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1471  return true;
1472 }
1473 
1474 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
1475  SDValue Val, uint32_t Offset,
1476  uint32_t Width) {
1477  // Transformation function, pack the offset and width of a BFE into
1478  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1479  // source, bits [5:0] contain the offset and bits [22:16] the width.
1480  uint32_t PackedVal = Offset | (Width << 16);
1481  SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1482 
1483  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1484 }
1485 
1486 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1487  // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1488  // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1489  // Predicate: 0 < b <= c < 32
1490 
1491  const SDValue &Shl = N->getOperand(0);
1494 
1495  if (B && C) {
1496  uint32_t BVal = B->getZExtValue();
1497  uint32_t CVal = C->getZExtValue();
1498 
1499  if (0 < BVal && BVal <= CVal && CVal < 32) {
1500  bool Signed = N->getOpcode() == ISD::SRA;
1501  unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1502 
1503  ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
1504  32 - CVal));
1505  return;
1506  }
1507  }
1508  SelectCode(N);
1509 }
1510 
1511 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1512  switch (N->getOpcode()) {
1513  case ISD::AND:
1514  if (N->getOperand(0).getOpcode() == ISD::SRL) {
1515  // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1516  // Predicate: isMask(mask)
1517  const SDValue &Srl = N->getOperand(0);
1520 
1521  if (Shift && Mask) {
1522  uint32_t ShiftVal = Shift->getZExtValue();
1523  uint32_t MaskVal = Mask->getZExtValue();
1524 
1525  if (isMask_32(MaskVal)) {
1526  uint32_t WidthVal = countPopulation(MaskVal);
1527 
1528  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1529  Srl.getOperand(0), ShiftVal, WidthVal));
1530  return;
1531  }
1532  }
1533  }
1534  break;
1535  case ISD::SRL:
1536  if (N->getOperand(0).getOpcode() == ISD::AND) {
1537  // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1538  // Predicate: isMask(mask >> b)
1539  const SDValue &And = N->getOperand(0);
1542 
1543  if (Shift && Mask) {
1544  uint32_t ShiftVal = Shift->getZExtValue();
1545  uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1546 
1547  if (isMask_32(MaskVal)) {
1548  uint32_t WidthVal = countPopulation(MaskVal);
1549 
1550  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1551  And.getOperand(0), ShiftVal, WidthVal));
1552  return;
1553  }
1554  }
1555  } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
1556  SelectS_BFEFromShifts(N);
1557  return;
1558  }
1559  break;
1560  case ISD::SRA:
1561  if (N->getOperand(0).getOpcode() == ISD::SHL) {
1562  SelectS_BFEFromShifts(N);
1563  return;
1564  }
1565  break;
1566 
1567  case ISD::SIGN_EXTEND_INREG: {
1568  // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
1569  SDValue Src = N->getOperand(0);
1570  if (Src.getOpcode() != ISD::SRL)
1571  break;
1572 
1573  const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1574  if (!Amt)
1575  break;
1576 
1577  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1578  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
1579  Amt->getZExtValue(), Width));
1580  return;
1581  }
1582  }
1583 
1584  SelectCode(N);
1585 }
1586 
1587 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
1588  assert(N->getOpcode() == ISD::BRCOND);
1589  if (!N->hasOneUse())
1590  return false;
1591 
1592  SDValue Cond = N->getOperand(1);
1593  if (Cond.getOpcode() == ISD::CopyToReg)
1594  Cond = Cond.getOperand(2);
1595 
1596  if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
1597  return false;
1598 
1599  MVT VT = Cond.getOperand(0).getSimpleValueType();
1600  if (VT == MVT::i32)
1601  return true;
1602 
1603  if (VT == MVT::i64) {
1604  auto ST = static_cast<const GCNSubtarget *>(Subtarget);
1605 
1606  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
1607  return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
1608  }
1609 
1610  return false;
1611 }
1612 
1613 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
1614  SDValue Cond = N->getOperand(1);
1615 
1616  if (Cond.isUndef()) {
1617  CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
1618  N->getOperand(2), N->getOperand(0));
1619  return;
1620  }
1621 
1622  bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
1623  unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
1624  unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC;
1625  SDLoc SL(N);
1626 
1627  if (!UseSCCBr) {
1628  // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
1629  // analyzed what generates the vcc value, so we do not know whether vcc
1630  // bits for disabled lanes are 0. Thus we need to mask out bits for
1631  // disabled lanes.
1632  //
1633  // For the case that we select S_CBRANCH_SCC1 and it gets
1634  // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
1635  // SIInstrInfo::moveToVALU which inserts the S_AND).
1636  //
1637  // We could add an analysis of what generates the vcc value here and omit
1638  // the S_AND when is unnecessary. But it would be better to add a separate
1639  // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
1640  // catches both cases.
1641  Cond = SDValue(CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
1642  CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
1643  Cond),
1644  0);
1645  }
1646 
1647  SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
1648  CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
1649  N->getOperand(2), // Basic Block
1650  VCC.getValue(0));
1651 }
1652 
1653 void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
1654  MVT VT = N->getSimpleValueType(0);
1655  bool IsFMA = N->getOpcode() == ISD::FMA;
1656  if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
1657  !Subtarget->hasFmaMixInsts()) ||
1658  ((IsFMA && Subtarget->hasMadMixInsts()) ||
1659  (!IsFMA && Subtarget->hasFmaMixInsts()))) {
1660  SelectCode(N);
1661  return;
1662  }
1663 
1664  SDValue Src0 = N->getOperand(0);
1665  SDValue Src1 = N->getOperand(1);
1666  SDValue Src2 = N->getOperand(2);
1667  unsigned Src0Mods, Src1Mods, Src2Mods;
1668 
1669  // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
1670  // using the conversion from f16.
1671  bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
1672  bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
1673  bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
1674 
1675  assert((IsFMA || !Subtarget->hasFP32Denormals()) &&
1676  "fmad selected with denormals enabled");
1677  // TODO: We can select this with f32 denormals enabled if all the sources are
1678  // converted from f16 (in which case fmad isn't legal).
1679 
1680  if (Sel0 || Sel1 || Sel2) {
1681  // For dummy operands.
1682  SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
1683  SDValue Ops[] = {
1684  CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
1685  CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
1686  CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
1687  CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
1688  Zero, Zero
1689  };
1690 
1691  CurDAG->SelectNodeTo(N,
1692  IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
1693  MVT::f32, Ops);
1694  } else {
1695  SelectCode(N);
1696  }
1697 }
1698 
1699 // This is here because there isn't a way to use the generated sub0_sub1 as the
1700 // subreg index to EXTRACT_SUBREG in tablegen.
1701 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
1702  MemSDNode *Mem = cast<MemSDNode>(N);
1703  unsigned AS = Mem->getAddressSpace();
1704  if (AS == AMDGPUAS::FLAT_ADDRESS) {
1705  SelectCode(N);
1706  return;
1707  }
1708 
1709  MVT VT = N->getSimpleValueType(0);
1710  bool Is32 = (VT == MVT::i32);
1711  SDLoc SL(N);
1712 
1713  MachineSDNode *CmpSwap = nullptr;
1714  if (Subtarget->hasAddr64()) {
1715  SDValue SRsrc, VAddr, SOffset, Offset, SLC;
1716 
1717  if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
1718  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
1719  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
1720  SDValue CmpVal = Mem->getOperand(2);
1721 
1722  // XXX - Do we care about glue operands?
1723 
1724  SDValue Ops[] = {
1725  CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1726  };
1727 
1728  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1729  }
1730  }
1731 
1732  if (!CmpSwap) {
1733  SDValue SRsrc, SOffset, Offset, SLC;
1734  if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
1735  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
1736  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
1737 
1738  SDValue CmpVal = Mem->getOperand(2);
1739  SDValue Ops[] = {
1740  CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1741  };
1742 
1743  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1744  }
1745  }
1746 
1747  if (!CmpSwap) {
1748  SelectCode(N);
1749  return;
1750  }
1751 
1752  MachineMemOperand *MMO = Mem->getMemOperand();
1753  CurDAG->setNodeMemRefs(CmpSwap, {MMO});
1754 
1755  unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
1756  SDValue Extract
1757  = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
1758 
1759  ReplaceUses(SDValue(N, 0), Extract);
1760  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
1761  CurDAG->RemoveDeadNode(N);
1762 }
1763 
1764 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
1765  unsigned &Mods) const {
1766  Mods = 0;
1767  Src = In;
1768 
1769  if (Src.getOpcode() == ISD::FNEG) {
1770  Mods |= SISrcMods::NEG;
1771  Src = Src.getOperand(0);
1772  }
1773 
1774  if (Src.getOpcode() == ISD::FABS) {
1775  Mods |= SISrcMods::ABS;
1776  Src = Src.getOperand(0);
1777  }
1778 
1779  return true;
1780 }
1781 
1782 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
1783  SDValue &SrcMods) const {
1784  unsigned Mods;
1785  if (SelectVOP3ModsImpl(In, Src, Mods)) {
1786  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1787  return true;
1788  }
1789 
1790  return false;
1791 }
1792 
1793 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
1794  SDValue &SrcMods) const {
1795  SelectVOP3Mods(In, Src, SrcMods);
1796  return isNoNanSrc(Src);
1797 }
1798 
1799 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
1800  if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
1801  return false;
1802 
1803  Src = In;
1804  return true;
1805 }
1806 
1807 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
1808  SDValue &SrcMods, SDValue &Clamp,
1809  SDValue &Omod) const {
1810  SDLoc DL(In);
1811  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1812  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
1813 
1814  return SelectVOP3Mods(In, Src, SrcMods);
1815 }
1816 
1817 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
1818  SDValue &SrcMods,
1819  SDValue &Clamp,
1820  SDValue &Omod) const {
1821  Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1822  return SelectVOP3Mods(In, Src, SrcMods);
1823 }
1824 
1825 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
1826  SDValue &Clamp, SDValue &Omod) const {
1827  Src = In;
1828 
1829  SDLoc DL(In);
1830  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1831  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
1832 
1833  return true;
1834 }
1835 
1837  return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
1838 }
1839 
1840 // Figure out if this is really an extract of the high 16-bits of a dword.
1841 static bool isExtractHiElt(SDValue In, SDValue &Out) {
1842  In = stripBitcast(In);
1843  if (In.getOpcode() != ISD::TRUNCATE)
1844  return false;
1845 
1846  SDValue Srl = In.getOperand(0);
1847  if (Srl.getOpcode() == ISD::SRL) {
1848  if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
1849  if (ShiftAmt->getZExtValue() == 16) {
1850  Out = stripBitcast(Srl.getOperand(0));
1851  return true;
1852  }
1853  }
1854  }
1855 
1856  return false;
1857 }
1858 
1859 // Look through operations that obscure just looking at the low 16-bits of the
1860 // same register.
1862  if (In.getOpcode() == ISD::TRUNCATE) {
1863  SDValue Src = In.getOperand(0);
1864  if (Src.getValueType().getSizeInBits() == 32)
1865  return stripBitcast(Src);
1866  }
1867 
1868  return In;
1869 }
1870 
1871 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
1872  SDValue &SrcMods) const {
1873  unsigned Mods = 0;
1874  Src = In;
1875 
1876  if (Src.getOpcode() == ISD::FNEG) {
1877  Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
1878  Src = Src.getOperand(0);
1879  }
1880 
1881  if (Src.getOpcode() == ISD::BUILD_VECTOR) {
1882  unsigned VecMods = Mods;
1883 
1884  SDValue Lo = stripBitcast(Src.getOperand(0));
1885  SDValue Hi = stripBitcast(Src.getOperand(1));
1886 
1887  if (Lo.getOpcode() == ISD::FNEG) {
1888  Lo = stripBitcast(Lo.getOperand(0));
1889  Mods ^= SISrcMods::NEG;
1890  }
1891 
1892  if (Hi.getOpcode() == ISD::FNEG) {
1893  Hi = stripBitcast(Hi.getOperand(0));
1894  Mods ^= SISrcMods::NEG_HI;
1895  }
1896 
1897  if (isExtractHiElt(Lo, Lo))
1898  Mods |= SISrcMods::OP_SEL_0;
1899 
1900  if (isExtractHiElt(Hi, Hi))
1901  Mods |= SISrcMods::OP_SEL_1;
1902 
1903  Lo = stripExtractLoElt(Lo);
1904  Hi = stripExtractLoElt(Hi);
1905 
1906  if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
1907  // Really a scalar input. Just select from the low half of the register to
1908  // avoid packing.
1909 
1910  Src = Lo;
1911  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1912  return true;
1913  }
1914 
1915  Mods = VecMods;
1916  }
1917 
1918  // Packed instructions do not have abs modifiers.
1919  Mods |= SISrcMods::OP_SEL_1;
1920 
1921  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1922  return true;
1923 }
1924 
1925 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src,
1926  SDValue &SrcMods,
1927  SDValue &Clamp) const {
1928  SDLoc SL(In);
1929 
1930  // FIXME: Handle clamp and op_sel
1931  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
1932 
1933  return SelectVOP3PMods(In, Src, SrcMods);
1934 }
1935 
1936 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
1937  SDValue &SrcMods) const {
1938  Src = In;
1939  // FIXME: Handle op_sel
1940  SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1941  return true;
1942 }
1943 
1944 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src,
1945  SDValue &SrcMods,
1946  SDValue &Clamp) const {
1947  SDLoc SL(In);
1948 
1949  // FIXME: Handle clamp
1950  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
1951 
1952  return SelectVOP3OpSel(In, Src, SrcMods);
1953 }
1954 
1955 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
1956  SDValue &SrcMods) const {
1957  // FIXME: Handle op_sel
1958  return SelectVOP3Mods(In, Src, SrcMods);
1959 }
1960 
1961 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src,
1962  SDValue &SrcMods,
1963  SDValue &Clamp) const {
1964  SDLoc SL(In);
1965 
1966  // FIXME: Handle clamp
1967  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
1968 
1969  return SelectVOP3OpSelMods(In, Src, SrcMods);
1970 }
1971 
1972 // The return value is not whether the match is possible (which it always is),
1973 // but whether or not it a conversion is really used.
1974 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
1975  unsigned &Mods) const {
1976  Mods = 0;
1977  SelectVOP3ModsImpl(In, Src, Mods);
1978 
1979  if (Src.getOpcode() == ISD::FP_EXTEND) {
1980  Src = Src.getOperand(0);
1981  assert(Src.getValueType() == MVT::f16);
1982  Src = stripBitcast(Src);
1983 
1984  // Be careful about folding modifiers if we already have an abs. fneg is
1985  // applied last, so we don't want to apply an earlier fneg.
1986  if ((Mods & SISrcMods::ABS) == 0) {
1987  unsigned ModsTmp;
1988  SelectVOP3ModsImpl(Src, Src, ModsTmp);
1989 
1990  if ((ModsTmp & SISrcMods::NEG) != 0)
1991  Mods ^= SISrcMods::NEG;
1992 
1993  if ((ModsTmp & SISrcMods::ABS) != 0)
1994  Mods |= SISrcMods::ABS;
1995  }
1996 
1997  // op_sel/op_sel_hi decide the source type and source.
1998  // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
1999  // If the sources's op_sel is set, it picks the high half of the source
2000  // register.
2001 
2002  Mods |= SISrcMods::OP_SEL_1;
2003  if (isExtractHiElt(Src, Src)) {
2004  Mods |= SISrcMods::OP_SEL_0;
2005 
2006  // TODO: Should we try to look for neg/abs here?
2007  }
2008 
2009  return true;
2010  }
2011 
2012  return false;
2013 }
2014 
2015 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
2016  SDValue &SrcMods) const {
2017  unsigned Mods = 0;
2018  SelectVOP3PMadMixModsImpl(In, Src, Mods);
2019  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2020  return true;
2021 }
2022 
2023 // TODO: Can we identify things like v_mad_mixhi_f16?
2024 bool AMDGPUDAGToDAGISel::SelectHi16Elt(SDValue In, SDValue &Src) const {
2025  if (In.isUndef()) {
2026  Src = In;
2027  return true;
2028  }
2029 
2030  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
2031  SDLoc SL(In);
2032  SDValue K = CurDAG->getTargetConstant(C->getZExtValue() << 16, SL, MVT::i32);
2033  MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
2034  SL, MVT::i32, K);
2035  Src = SDValue(MovK, 0);
2036  return true;
2037  }
2038 
2039  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
2040  SDLoc SL(In);
2041  SDValue K = CurDAG->getTargetConstant(
2042  C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
2043  MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
2044  SL, MVT::i32, K);
2045  Src = SDValue(MovK, 0);
2046  return true;
2047  }
2048 
2049  return isExtractHiElt(In, Src);
2050 }
2051 
2052 bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
2053  if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
2054  return false;
2055  }
2056  const SIRegisterInfo *SIRI =
2057  static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
2058  const SIInstrInfo * SII =
2059  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
2060 
2061  unsigned Limit = 0;
2062  bool AllUsesAcceptSReg = true;
2063  for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
2064  Limit < 10 && U != E; ++U, ++Limit) {
2065  const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
2066 
2067  // If the register class is unknown, it could be an unknown
2068  // register class that needs to be an SGPR, e.g. an inline asm
2069  // constraint
2070  if (!RC || SIRI->isSGPRClass(RC))
2071  return false;
2072 
2073  if (RC != &AMDGPU::VS_32RegClass) {
2074  AllUsesAcceptSReg = false;
2075  SDNode * User = *U;
2076  if (User->isMachineOpcode()) {
2077  unsigned Opc = User->getMachineOpcode();
2078  MCInstrDesc Desc = SII->get(Opc);
2079  if (Desc.isCommutable()) {
2080  unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
2081  unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
2082  if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
2083  unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
2084  const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
2085  if (CommutedRC == &AMDGPU::VS_32RegClass)
2086  AllUsesAcceptSReg = true;
2087  }
2088  }
2089  }
2090  // If "AllUsesAcceptSReg == false" so far we haven't suceeded
2091  // commuting current user. This means have at least one use
2092  // that strictly require VGPR. Thus, we will not attempt to commute
2093  // other user instructions.
2094  if (!AllUsesAcceptSReg)
2095  break;
2096  }
2097  }
2098  return !AllUsesAcceptSReg && (Limit < 10);
2099 }
2100 
2101 bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
2102  auto Ld = cast<LoadSDNode>(N);
2103 
2104  return Ld->getAlignment() >= 4 &&
2105  (
2106  (
2107  (
2108  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
2109  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
2110  )
2111  &&
2112  !N->isDivergent()
2113  )
2114  ||
2115  (
2116  Subtarget->getScalarizeGlobalBehavior() &&
2117  Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
2118  !Ld->isVolatile() &&
2119  !N->isDivergent() &&
2120  static_cast<const SITargetLowering *>(
2121  getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)
2122  )
2123  );
2124 }
2125 
2126 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
2128  *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
2129  bool IsModified = false;
2130  do {
2131  IsModified = false;
2132 
2133  // Go over all selected nodes and try to fold them a bit more
2134  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
2135  while (Position != CurDAG->allnodes_end()) {
2136  SDNode *Node = &*Position++;
2137  MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
2138  if (!MachineNode)
2139  continue;
2140 
2141  SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
2142  if (ResNode != Node) {
2143  if (ResNode)
2144  ReplaceUses(Node, ResNode);
2145  IsModified = true;
2146  }
2147  }
2148  CurDAG->RemoveDeadNodes();
2149  } while (IsModified);
2150 }
2151 
2152 bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
2153  Subtarget = &MF.getSubtarget<R600Subtarget>();
2155 }
2156 
2157 bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
2158  if (!N->readMem())
2159  return false;
2160  if (CbId == -1)
2163 
2164  return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
2165 }
2166 
2167 bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
2168  SDValue& IntPtr) {
2169  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
2170  IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
2171  true);
2172  return true;
2173  }
2174  return false;
2175 }
2176 
2177 bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
2178  SDValue& BaseReg, SDValue &Offset) {
2179  if (!isa<ConstantSDNode>(Addr)) {
2180  BaseReg = Addr;
2181  Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
2182  return true;
2183  }
2184  return false;
2185 }
2186 
2188  unsigned int Opc = N->getOpcode();
2189  if (N->isMachineOpcode()) {
2190  N->setNodeId(-1);
2191  return; // Already selected.
2192  }
2193 
2194  switch (Opc) {
2195  default: break;
2197  case ISD::SCALAR_TO_VECTOR:
2198  case ISD::BUILD_VECTOR: {
2199  EVT VT = N->getValueType(0);
2200  unsigned NumVectorElts = VT.getVectorNumElements();
2201  unsigned RegClassID;
2202  // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
2203  // that adds a 128 bits reg copy when going through TwoAddressInstructions
2204  // pass. We want to avoid 128 bits copies as much as possible because they
2205  // can't be bundled by our scheduler.
2206  switch(NumVectorElts) {
2207  case 2: RegClassID = R600::R600_Reg64RegClassID; break;
2208  case 4:
2210  RegClassID = R600::R600_Reg128VerticalRegClassID;
2211  else
2212  RegClassID = R600::R600_Reg128RegClassID;
2213  break;
2214  default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
2215  }
2216  SelectBuildVector(N, RegClassID);
2217  return;
2218  }
2219  }
2220 
2221  SelectCode(N);
2222 }
2223 
2224 bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
2225  SDValue &Offset) {
2226  ConstantSDNode *C;
2227  SDLoc DL(Addr);
2228 
2229  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
2230  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
2231  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2232  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
2233  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
2234  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
2235  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2236  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
2237  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
2238  Base = Addr.getOperand(0);
2239  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2240  } else {
2241  Base = Addr;
2242  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
2243  }
2244 
2245  return true;
2246 }
2247 
2248 bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
2249  SDValue &Offset) {
2250  ConstantSDNode *IMMOffset;
2251 
2252  if (Addr.getOpcode() == ISD::ADD
2253  && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
2254  && isInt<16>(IMMOffset->getZExtValue())) {
2255 
2256  Base = Addr.getOperand(0);
2257  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2258  MVT::i32);
2259  return true;
2260  // If the pointer address is constant, we can move it to the offset field.
2261  } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
2262  && isInt<16>(IMMOffset->getZExtValue())) {
2263  Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
2264  SDLoc(CurDAG->getEntryNode()),
2265  R600::ZERO, MVT::i32);
2266  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2267  MVT::i32);
2268  return true;
2269  }
2270 
2271  // Default case, no offset
2272  Base = Addr;
2273  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2274  return true;
2275 }
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:571
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:349
static bool isExtractHiElt(SDValue In, SDValue &Out)
EVT getValueType() const
Return the ValueType of the referenced return value.
Interface definition for SIRegisterInfo.
static unsigned getSubRegFromChannel(unsigned Channel)
bool isUndef() const
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1563
INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel", "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) INITIALIZE_PASS_END(AMDGPUDAGToDAGISel
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:562
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
AMDGPU specific subclass of TargetSubtarget.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
This class represents lattice values for constants.
Definition: AllocatorList.h:24
uint64_t getDefaultRsrcDataFormat() const
FunctionPass * createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a R600-specific.
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MCInstrDesc.h:437
Shadow Stack GC Lowering
SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL, SDValue V) const
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:223
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:164
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
SDVTList getVTList() const
unsigned Reg
Address space for 32-bit constant memory.
Definition: AMDGPU.h:263
const SDValue & getChain() const
virtual SDNode * PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const =0
static bool getConstantValue(SDValue N, uint32_t &Out)
SDNode * legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const
Legalize target independent instructions (e.g.
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:306
unsigned const TargetRegisterInfo * TRI
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
const SDNodeFlags getFlags() const
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:138
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
bool isInlineConstant(const APInt &Imm) const
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:405
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineSDNode * wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr) const
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
Address space for constant memory (VTX2)
Definition: AMDGPU.h:259
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:211
const HexagonInstrInfo * TII
Shift and rotation operations.
Definition: ISDOpcodes.h:410
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:191
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:170
unsigned SubReg
Position
Position to insert a new instruction relative to an existing instruction.
bool isSGPRClass(const TargetRegisterClass *RC) const
This file implements a class to represent arbitrary precision integral constant values and operations...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
TargetRegisterInfo interface that is implemented by all hw codegen targets.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
int64_t getSExtValue() const
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:298
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:221
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:959
Analysis containing CSE Info
Definition: CSEInfo.cpp:21
amdgpu isel
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:327
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static const unsigned CommuteAnyOperandIndex
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:343
unsigned const MachineRegisterInfo * MRI
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
Machine Value Type.
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273
Address space for flat memory.
Definition: AMDGPU.h:255
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const SDValue & getOperand(unsigned Num) const
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:232
SI DAG Lowering interface definition.
Represent the analysis usage information of a pass.
This class provides iterator support for SDUse operands that use a specific SDNode.
Address space for local memory.
Definition: AMDGPU.h:260
bool hasNoNaNs() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:72
bool isDefined() const
Returns true if the flags are in a defined state.
The AMDGPU TargetMachine interface definition for hw codgen targets.
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:556
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwises returns null...
Definition: PointerUnion.h:142
Extended Value Type.
Definition: ValueTypes.h:34
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:256
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts)
MachineSDNode * buildRSRC(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr, uint32_t RsrcDword1, uint64_t RsrcDword2And3) const
Return a resource descriptor with the &#39;Add TID&#39; bit enabled The TID (Thread ID) is multiplied by the ...
Iterator for intrusive lists based on ilist_node.
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:520
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:265
Interface definition of the TargetLowering class that is common to all AMD GPUs.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:644
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
bool isDivergent() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:226
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
bool readMem() const
Special value supplied for machine level alias analysis.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo)
amdgpu AMDGPU DAG DAG Pattern Instruction Selection
static SDValue stripExtractLoElt(SDValue In)
static use_iterator use_end()
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:387
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:73
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:486
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:614
#define N
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
unsigned getOpcode() const
SDValue getValue(unsigned R) const
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:346
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isLegalMUBUFImmOffset(unsigned Imm)
Definition: SIInstrInfo.h:923
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:302
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
bool hasNoUnsignedWrap() const
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:175
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
static SDValue stripBitcast(SDValue Val)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations...
Definition: ISDOpcodes.h:306
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:443
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:223
const SDValue & getOperand(unsigned i) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:789
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:474
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:584
Analyzes if a function potentially memory bound and if a kernel kernel may benefit from limiting numb...
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:785
const SDValue & getBasePtr() const
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:375
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override