LLVM  8.0.1
SIInstrInfo.cpp
Go to the documentation of this file.
1 //===- SIInstrInfo.cpp - SI Instruction Information ----------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// SI Implementation of TargetInstrInfo.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "SIInstrInfo.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUIntrinsicInfo.h"
18 #include "AMDGPUSubtarget.h"
19 #include "GCNHazardRecognizer.h"
20 #include "SIDefines.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "SIRegisterInfo.h"
24 #include "Utils/AMDGPUBaseInfo.h"
25 #include "llvm/ADT/APInt.h"
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/StringRef.h"
48 #include "llvm/IR/DebugLoc.h"
49 #include "llvm/IR/DiagnosticInfo.h"
50 #include "llvm/IR/Function.h"
51 #include "llvm/IR/InlineAsm.h"
52 #include "llvm/IR/LLVMContext.h"
53 #include "llvm/MC/MCInstrDesc.h"
54 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/Compiler.h"
61 #include <cassert>
62 #include <cstdint>
63 #include <iterator>
64 #include <utility>
65 
66 using namespace llvm;
67 
68 #define GET_INSTRINFO_CTOR_DTOR
69 #include "AMDGPUGenInstrInfo.inc"
70 
71 namespace llvm {
72 namespace AMDGPU {
73 #define GET_D16ImageDimIntrinsics_IMPL
74 #define GET_ImageDimIntrinsicTable_IMPL
75 #define GET_RsrcIntrinsics_IMPL
76 #include "AMDGPUGenSearchableTables.inc"
77 }
78 }
79 
80 
81 // Must be at least 4 to be able to branch over minimum unconditional branch
82 // code. This is only for making it possible to write reasonably small tests for
83 // long branches.
84 static cl::opt<unsigned>
85 BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16),
86  cl::desc("Restrict range of branch instructions (DEBUG)"));
87 
89  : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
90  RI(ST), ST(ST) {}
91 
92 //===----------------------------------------------------------------------===//
93 // TargetInstrInfo callbacks
94 //===----------------------------------------------------------------------===//
95 
96 static unsigned getNumOperandsNoGlue(SDNode *Node) {
97  unsigned N = Node->getNumOperands();
98  while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
99  --N;
100  return N;
101 }
102 
104  SDValue LastOp = Load->getOperand(getNumOperandsNoGlue(Load) - 1);
105  assert(LastOp.getValueType() == MVT::Other && "Chain missing from load node");
106  return LastOp;
107 }
108 
109 /// Returns true if both nodes have the same value for the given
110 /// operand \p Op, or if both nodes do not have this operand.
111 static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) {
112  unsigned Opc0 = N0->getMachineOpcode();
113  unsigned Opc1 = N1->getMachineOpcode();
114 
115  int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName);
116  int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName);
117 
118  if (Op0Idx == -1 && Op1Idx == -1)
119  return true;
120 
121 
122  if ((Op0Idx == -1 && Op1Idx != -1) ||
123  (Op1Idx == -1 && Op0Idx != -1))
124  return false;
125 
126  // getNamedOperandIdx returns the index for the MachineInstr's operands,
127  // which includes the result as the first operand. We are indexing into the
128  // MachineSDNode's operands, so we need to skip the result operand to get
129  // the real index.
130  --Op0Idx;
131  --Op1Idx;
132 
133  return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx);
134 }
135 
137  AliasAnalysis *AA) const {
138  // TODO: The generic check fails for VALU instructions that should be
139  // rematerializable due to implicit reads of exec. We really want all of the
140  // generic logic for this except for this.
141  switch (MI.getOpcode()) {
142  case AMDGPU::V_MOV_B32_e32:
143  case AMDGPU::V_MOV_B32_e64:
144  case AMDGPU::V_MOV_B64_PSEUDO:
145  return true;
146  default:
147  return false;
148  }
149 }
150 
152  int64_t &Offset0,
153  int64_t &Offset1) const {
154  if (!Load0->isMachineOpcode() || !Load1->isMachineOpcode())
155  return false;
156 
157  unsigned Opc0 = Load0->getMachineOpcode();
158  unsigned Opc1 = Load1->getMachineOpcode();
159 
160  // Make sure both are actually loads.
161  if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad())
162  return false;
163 
164  if (isDS(Opc0) && isDS(Opc1)) {
165 
166  // FIXME: Handle this case:
167  if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1))
168  return false;
169 
170  // Check base reg.
171  if (Load0->getOperand(1) != Load1->getOperand(1))
172  return false;
173 
174  // Check chain.
175  if (findChainOperand(Load0) != findChainOperand(Load1))
176  return false;
177 
178  // Skip read2 / write2 variants for simplicity.
179  // TODO: We should report true if the used offsets are adjacent (excluded
180  // st64 versions).
181  if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 ||
182  AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1)
183  return false;
184 
185  Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue();
186  Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue();
187  return true;
188  }
189 
190  if (isSMRD(Opc0) && isSMRD(Opc1)) {
191  // Skip time and cache invalidation instructions.
192  if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::sbase) == -1 ||
193  AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::sbase) == -1)
194  return false;
195 
197 
198  // Check base reg.
199  if (Load0->getOperand(0) != Load1->getOperand(0))
200  return false;
201 
202  const ConstantSDNode *Load0Offset =
203  dyn_cast<ConstantSDNode>(Load0->getOperand(1));
204  const ConstantSDNode *Load1Offset =
205  dyn_cast<ConstantSDNode>(Load1->getOperand(1));
206 
207  if (!Load0Offset || !Load1Offset)
208  return false;
209 
210  // Check chain.
211  if (findChainOperand(Load0) != findChainOperand(Load1))
212  return false;
213 
214  Offset0 = Load0Offset->getZExtValue();
215  Offset1 = Load1Offset->getZExtValue();
216  return true;
217  }
218 
219  // MUBUF and MTBUF can access the same addresses.
220  if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) {
221 
222  // MUBUF and MTBUF have vaddr at different indices.
223  if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) ||
224  findChainOperand(Load0) != findChainOperand(Load1) ||
225  !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) ||
226  !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc))
227  return false;
228 
229  int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
230  int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
231 
232  if (OffIdx0 == -1 || OffIdx1 == -1)
233  return false;
234 
235  // getNamedOperandIdx returns the index for MachineInstrs. Since they
236  // inlcude the output in the operand list, but SDNodes don't, we need to
237  // subtract the index by one.
238  --OffIdx0;
239  --OffIdx1;
240 
241  SDValue Off0 = Load0->getOperand(OffIdx0);
242  SDValue Off1 = Load1->getOperand(OffIdx1);
243 
244  // The offset might be a FrameIndexSDNode.
245  if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
246  return false;
247 
248  Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue();
249  Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue();
250  return true;
251  }
252 
253  return false;
254 }
255 
256 static bool isStride64(unsigned Opc) {
257  switch (Opc) {
258  case AMDGPU::DS_READ2ST64_B32:
259  case AMDGPU::DS_READ2ST64_B64:
260  case AMDGPU::DS_WRITE2ST64_B32:
261  case AMDGPU::DS_WRITE2ST64_B64:
262  return true;
263  default:
264  return false;
265  }
266 }
267 
269  MachineOperand *&BaseOp,
270  int64_t &Offset,
271  const TargetRegisterInfo *TRI) const {
272  unsigned Opc = LdSt.getOpcode();
273 
274  if (isDS(LdSt)) {
275  const MachineOperand *OffsetImm =
276  getNamedOperand(LdSt, AMDGPU::OpName::offset);
277  if (OffsetImm) {
278  // Normal, single offset LDS instruction.
279  BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr);
280  Offset = OffsetImm->getImm();
281  assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
282  "operands of type register.");
283  return true;
284  }
285 
286  // The 2 offset instructions use offset0 and offset1 instead. We can treat
287  // these as a load with a single offset if the 2 offsets are consecutive. We
288  // will use this for some partially aligned loads.
289  const MachineOperand *Offset0Imm =
290  getNamedOperand(LdSt, AMDGPU::OpName::offset0);
291  const MachineOperand *Offset1Imm =
292  getNamedOperand(LdSt, AMDGPU::OpName::offset1);
293 
294  uint8_t Offset0 = Offset0Imm->getImm();
295  uint8_t Offset1 = Offset1Imm->getImm();
296 
297  if (Offset1 > Offset0 && Offset1 - Offset0 == 1) {
298  // Each of these offsets is in element sized units, so we need to convert
299  // to bytes of the individual reads.
300 
301  unsigned EltSize;
302  if (LdSt.mayLoad())
303  EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, 0)) / 16;
304  else {
305  assert(LdSt.mayStore());
306  int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
307  EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, Data0Idx)) / 8;
308  }
309 
310  if (isStride64(Opc))
311  EltSize *= 64;
312 
313  BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr);
314  Offset = EltSize * Offset0;
315  assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
316  "operands of type register.");
317  return true;
318  }
319 
320  return false;
321  }
322 
323  if (isMUBUF(LdSt) || isMTBUF(LdSt)) {
324  const MachineOperand *SOffset = getNamedOperand(LdSt, AMDGPU::OpName::soffset);
325  if (SOffset && SOffset->isReg())
326  return false;
327 
328  MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
329  if (!AddrReg)
330  return false;
331 
332  const MachineOperand *OffsetImm =
333  getNamedOperand(LdSt, AMDGPU::OpName::offset);
334  BaseOp = AddrReg;
335  Offset = OffsetImm->getImm();
336 
337  if (SOffset) // soffset can be an inline immediate.
338  Offset += SOffset->getImm();
339 
340  assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
341  "operands of type register.");
342  return true;
343  }
344 
345  if (isSMRD(LdSt)) {
346  const MachineOperand *OffsetImm =
347  getNamedOperand(LdSt, AMDGPU::OpName::offset);
348  if (!OffsetImm)
349  return false;
350 
351  MachineOperand *SBaseReg = getNamedOperand(LdSt, AMDGPU::OpName::sbase);
352  BaseOp = SBaseReg;
353  Offset = OffsetImm->getImm();
354  assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
355  "operands of type register.");
356  return true;
357  }
358 
359  if (isFLAT(LdSt)) {
360  MachineOperand *VAddr = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
361  if (VAddr) {
362  // Can't analyze 2 offsets.
363  if (getNamedOperand(LdSt, AMDGPU::OpName::saddr))
364  return false;
365 
366  BaseOp = VAddr;
367  } else {
368  // scratch instructions have either vaddr or saddr.
369  BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::saddr);
370  }
371 
372  Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm();
373  assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
374  "operands of type register.");
375  return true;
376  }
377 
378  return false;
379 }
380 
381 static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
382  const MachineOperand &BaseOp1,
383  const MachineInstr &MI2,
384  const MachineOperand &BaseOp2) {
385  // Support only base operands with base registers.
386  // Note: this could be extended to support FI operands.
387  if (!BaseOp1.isReg() || !BaseOp2.isReg())
388  return false;
389 
390  if (BaseOp1.isIdenticalTo(BaseOp2))
391  return true;
392 
393  if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand())
394  return false;
395 
396  auto MO1 = *MI1.memoperands_begin();
397  auto MO2 = *MI2.memoperands_begin();
398  if (MO1->getAddrSpace() != MO2->getAddrSpace())
399  return false;
400 
401  auto Base1 = MO1->getValue();
402  auto Base2 = MO2->getValue();
403  if (!Base1 || !Base2)
404  return false;
405  const MachineFunction &MF = *MI1.getParent()->getParent();
406  const DataLayout &DL = MF.getFunction().getParent()->getDataLayout();
407  Base1 = GetUnderlyingObject(Base1, DL);
408  Base2 = GetUnderlyingObject(Base1, DL);
409 
410  if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
411  return false;
412 
413  return Base1 == Base2;
414 }
415 
417  MachineOperand &BaseOp2,
418  unsigned NumLoads) const {
419  MachineInstr &FirstLdSt = *BaseOp1.getParent();
420  MachineInstr &SecondLdSt = *BaseOp2.getParent();
421 
422  if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOp1, SecondLdSt, BaseOp2))
423  return false;
424 
425  const MachineOperand *FirstDst = nullptr;
426  const MachineOperand *SecondDst = nullptr;
427 
428  if ((isMUBUF(FirstLdSt) && isMUBUF(SecondLdSt)) ||
429  (isMTBUF(FirstLdSt) && isMTBUF(SecondLdSt)) ||
430  (isFLAT(FirstLdSt) && isFLAT(SecondLdSt))) {
431  const unsigned MaxGlobalLoadCluster = 6;
432  if (NumLoads > MaxGlobalLoadCluster)
433  return false;
434 
435  FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdata);
436  if (!FirstDst)
437  FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst);
438  SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdata);
439  if (!SecondDst)
440  SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst);
441  } else if (isSMRD(FirstLdSt) && isSMRD(SecondLdSt)) {
442  FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::sdst);
443  SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::sdst);
444  } else if (isDS(FirstLdSt) && isDS(SecondLdSt)) {
445  FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst);
446  SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst);
447  }
448 
449  if (!FirstDst || !SecondDst)
450  return false;
451 
452  // Try to limit clustering based on the total number of bytes loaded
453  // rather than the number of instructions. This is done to help reduce
454  // register pressure. The method used is somewhat inexact, though,
455  // because it assumes that all loads in the cluster will load the
456  // same number of bytes as FirstLdSt.
457 
458  // The unit of this value is bytes.
459  // FIXME: This needs finer tuning.
460  unsigned LoadClusterThreshold = 16;
461 
462  const MachineRegisterInfo &MRI =
463  FirstLdSt.getParent()->getParent()->getRegInfo();
464  const TargetRegisterClass *DstRC = MRI.getRegClass(FirstDst->getReg());
465 
466  return (NumLoads * (RI.getRegSizeInBits(*DstRC) / 8)) <= LoadClusterThreshold;
467 }
468 
469 // FIXME: This behaves strangely. If, for example, you have 32 load + stores,
470 // the first 16 loads will be interleaved with the stores, and the next 16 will
471 // be clustered as expected. It should really split into 2 16 store batches.
472 //
473 // Loads are clustered until this returns false, rather than trying to schedule
474 // groups of stores. This also means we have to deal with saying different
475 // address space loads should be clustered, and ones which might cause bank
476 // conflicts.
477 //
478 // This might be deprecated so it might not be worth that much effort to fix.
480  int64_t Offset0, int64_t Offset1,
481  unsigned NumLoads) const {
482  assert(Offset1 > Offset0 &&
483  "Second offset should be larger than first offset!");
484  // If we have less than 16 loads in a row, and the offsets are within 64
485  // bytes, then schedule together.
486 
487  // A cacheline is 64 bytes (for global memory).
488  return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
489 }
490 
493  const DebugLoc &DL, unsigned DestReg,
494  unsigned SrcReg, bool KillSrc) {
495  MachineFunction *MF = MBB.getParent();
496  DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(),
497  "illegal SGPR to VGPR copy",
498  DL, DS_Error);
499  LLVMContext &C = MF->getFunction().getContext();
500  C.diagnose(IllegalCopy);
501 
502  BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_ILLEGAL_COPY), DestReg)
503  .addReg(SrcReg, getKillRegState(KillSrc));
504 }
505 
508  const DebugLoc &DL, unsigned DestReg,
509  unsigned SrcReg, bool KillSrc) const {
510  const TargetRegisterClass *RC = RI.getPhysRegClass(DestReg);
511 
512  if (RC == &AMDGPU::VGPR_32RegClass) {
513  assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
514  AMDGPU::SReg_32RegClass.contains(SrcReg));
515  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
516  .addReg(SrcReg, getKillRegState(KillSrc));
517  return;
518  }
519 
520  if (RC == &AMDGPU::SReg_32_XM0RegClass ||
521  RC == &AMDGPU::SReg_32RegClass) {
522  if (SrcReg == AMDGPU::SCC) {
523  BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg)
524  .addImm(-1)
525  .addImm(0);
526  return;
527  }
528 
529  if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) {
530  reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
531  return;
532  }
533 
534  BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
535  .addReg(SrcReg, getKillRegState(KillSrc));
536  return;
537  }
538 
539  if (RC == &AMDGPU::SReg_64RegClass) {
540  if (DestReg == AMDGPU::VCC) {
541  if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
542  BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
543  .addReg(SrcReg, getKillRegState(KillSrc));
544  } else {
545  // FIXME: Hack until VReg_1 removed.
546  assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
547  BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32))
548  .addImm(0)
549  .addReg(SrcReg, getKillRegState(KillSrc));
550  }
551 
552  return;
553  }
554 
555  if (!AMDGPU::SReg_64RegClass.contains(SrcReg)) {
556  reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
557  return;
558  }
559 
560  BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
561  .addReg(SrcReg, getKillRegState(KillSrc));
562  return;
563  }
564 
565  if (DestReg == AMDGPU::SCC) {
566  assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
567  BuildMI(MBB, MI, DL, get(AMDGPU::S_CMP_LG_U32))
568  .addReg(SrcReg, getKillRegState(KillSrc))
569  .addImm(0);
570  return;
571  }
572 
573  unsigned EltSize = 4;
574  unsigned Opcode = AMDGPU::V_MOV_B32_e32;
575  if (RI.isSGPRClass(RC)) {
576  if (RI.getRegSizeInBits(*RC) > 32) {
577  Opcode = AMDGPU::S_MOV_B64;
578  EltSize = 8;
579  } else {
580  Opcode = AMDGPU::S_MOV_B32;
581  EltSize = 4;
582  }
583 
584  if (!RI.isSGPRClass(RI.getPhysRegClass(SrcReg))) {
585  reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
586  return;
587  }
588  }
589 
590  ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RC, EltSize);
591  bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
592 
593  for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
594  unsigned SubIdx;
595  if (Forward)
596  SubIdx = SubIndices[Idx];
597  else
598  SubIdx = SubIndices[SubIndices.size() - Idx - 1];
599 
600  MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
601  get(Opcode), RI.getSubReg(DestReg, SubIdx));
602 
603  Builder.addReg(RI.getSubReg(SrcReg, SubIdx));
604 
605  if (Idx == 0)
606  Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
607 
608  bool UseKill = KillSrc && Idx == SubIndices.size() - 1;
609  Builder.addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit);
610  }
611 }
612 
613 int SIInstrInfo::commuteOpcode(unsigned Opcode) const {
614  int NewOpc;
615 
616  // Try to map original to commuted opcode
617  NewOpc = AMDGPU::getCommuteRev(Opcode);
618  if (NewOpc != -1)
619  // Check if the commuted (REV) opcode exists on the target.
620  return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
621 
622  // Try to map commuted to original opcode
623  NewOpc = AMDGPU::getCommuteOrig(Opcode);
624  if (NewOpc != -1)
625  // Check if the original (non-REV) opcode exists on the target.
626  return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
627 
628  return Opcode;
629 }
630 
633  const DebugLoc &DL, unsigned DestReg,
634  int64_t Value) const {
636  const TargetRegisterClass *RegClass = MRI.getRegClass(DestReg);
637  if (RegClass == &AMDGPU::SReg_32RegClass ||
638  RegClass == &AMDGPU::SGPR_32RegClass ||
639  RegClass == &AMDGPU::SReg_32_XM0RegClass ||
640  RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) {
641  BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
642  .addImm(Value);
643  return;
644  }
645 
646  if (RegClass == &AMDGPU::SReg_64RegClass ||
647  RegClass == &AMDGPU::SGPR_64RegClass ||
648  RegClass == &AMDGPU::SReg_64_XEXECRegClass) {
649  BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
650  .addImm(Value);
651  return;
652  }
653 
654  if (RegClass == &AMDGPU::VGPR_32RegClass) {
655  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
656  .addImm(Value);
657  return;
658  }
659  if (RegClass == &AMDGPU::VReg_64RegClass) {
660  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), DestReg)
661  .addImm(Value);
662  return;
663  }
664 
665  unsigned EltSize = 4;
666  unsigned Opcode = AMDGPU::V_MOV_B32_e32;
667  if (RI.isSGPRClass(RegClass)) {
668  if (RI.getRegSizeInBits(*RegClass) > 32) {
669  Opcode = AMDGPU::S_MOV_B64;
670  EltSize = 8;
671  } else {
672  Opcode = AMDGPU::S_MOV_B32;
673  EltSize = 4;
674  }
675  }
676 
677  ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RegClass, EltSize);
678  for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
679  int64_t IdxValue = Idx == 0 ? Value : 0;
680 
681  MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
682  get(Opcode), RI.getSubReg(DestReg, Idx));
683  Builder.addImm(IdxValue);
684  }
685 }
686 
687 const TargetRegisterClass *
689  return &AMDGPU::VGPR_32RegClass;
690 }
691 
694  const DebugLoc &DL, unsigned DstReg,
696  unsigned TrueReg,
697  unsigned FalseReg) const {
699  assert(MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
700  "Not a VGPR32 reg");
701 
702  if (Cond.size() == 1) {
703  unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
704  BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
705  .add(Cond[0]);
706  BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
707  .addReg(FalseReg)
708  .addReg(TrueReg)
709  .addReg(SReg);
710  } else if (Cond.size() == 2) {
711  assert(Cond[0].isImm() && "Cond[0] is not an immediate");
712  switch (Cond[0].getImm()) {
713  case SIInstrInfo::SCC_TRUE: {
714  unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
715  BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
716  .addImm(-1)
717  .addImm(0);
718  BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
719  .addReg(FalseReg)
720  .addReg(TrueReg)
721  .addReg(SReg);
722  break;
723  }
724  case SIInstrInfo::SCC_FALSE: {
725  unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
726  BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
727  .addImm(0)
728  .addImm(-1);
729  BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
730  .addReg(FalseReg)
731  .addReg(TrueReg)
732  .addReg(SReg);
733  break;
734  }
735  case SIInstrInfo::VCCNZ: {
736  MachineOperand RegOp = Cond[1];
737  RegOp.setImplicit(false);
738  unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
739  BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
740  .add(RegOp);
741  BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
742  .addReg(FalseReg)
743  .addReg(TrueReg)
744  .addReg(SReg);
745  break;
746  }
747  case SIInstrInfo::VCCZ: {
748  MachineOperand RegOp = Cond[1];
749  RegOp.setImplicit(false);
750  unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
751  BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
752  .add(RegOp);
753  BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
754  .addReg(TrueReg)
755  .addReg(FalseReg)
756  .addReg(SReg);
757  break;
758  }
759  case SIInstrInfo::EXECNZ: {
760  unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
761  unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
762  BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
763  .addImm(0);
764  BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
765  .addImm(-1)
766  .addImm(0);
767  BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
768  .addReg(FalseReg)
769  .addReg(TrueReg)
770  .addReg(SReg);
771  break;
772  }
773  case SIInstrInfo::EXECZ: {
774  unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
775  unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
776  BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
777  .addImm(0);
778  BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
779  .addImm(0)
780  .addImm(-1);
781  BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
782  .addReg(FalseReg)
783  .addReg(TrueReg)
784  .addReg(SReg);
785  llvm_unreachable("Unhandled branch predicate EXECZ");
786  break;
787  }
788  default:
789  llvm_unreachable("invalid branch predicate");
790  }
791  } else {
792  llvm_unreachable("Can only handle Cond size 1 or 2");
793  }
794 }
795 
798  const DebugLoc &DL,
799  unsigned SrcReg, int Value) const {
801  unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
802  BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_EQ_I32_e64), Reg)
803  .addImm(Value)
804  .addReg(SrcReg);
805 
806  return Reg;
807 }
808 
811  const DebugLoc &DL,
812  unsigned SrcReg, int Value) const {
814  unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
815  BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_NE_I32_e64), Reg)
816  .addImm(Value)
817  .addReg(SrcReg);
818 
819  return Reg;
820 }
821 
822 unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
823 
824  if (RI.getRegSizeInBits(*DstRC) == 32) {
825  return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
826  } else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC)) {
827  return AMDGPU::S_MOV_B64;
828  } else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC)) {
829  return AMDGPU::V_MOV_B64_PSEUDO;
830  }
831  return AMDGPU::COPY;
832 }
833 
834 static unsigned getSGPRSpillSaveOpcode(unsigned Size) {
835  switch (Size) {
836  case 4:
837  return AMDGPU::SI_SPILL_S32_SAVE;
838  case 8:
839  return AMDGPU::SI_SPILL_S64_SAVE;
840  case 16:
841  return AMDGPU::SI_SPILL_S128_SAVE;
842  case 32:
843  return AMDGPU::SI_SPILL_S256_SAVE;
844  case 64:
845  return AMDGPU::SI_SPILL_S512_SAVE;
846  default:
847  llvm_unreachable("unknown register size");
848  }
849 }
850 
851 static unsigned getVGPRSpillSaveOpcode(unsigned Size) {
852  switch (Size) {
853  case 4:
854  return AMDGPU::SI_SPILL_V32_SAVE;
855  case 8:
856  return AMDGPU::SI_SPILL_V64_SAVE;
857  case 12:
858  return AMDGPU::SI_SPILL_V96_SAVE;
859  case 16:
860  return AMDGPU::SI_SPILL_V128_SAVE;
861  case 32:
862  return AMDGPU::SI_SPILL_V256_SAVE;
863  case 64:
864  return AMDGPU::SI_SPILL_V512_SAVE;
865  default:
866  llvm_unreachable("unknown register size");
867  }
868 }
869 
872  unsigned SrcReg, bool isKill,
873  int FrameIndex,
874  const TargetRegisterClass *RC,
875  const TargetRegisterInfo *TRI) const {
876  MachineFunction *MF = MBB.getParent();
878  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
879  const DebugLoc &DL = MBB.findDebugLoc(MI);
880 
881  unsigned Size = FrameInfo.getObjectSize(FrameIndex);
882  unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
883  MachinePointerInfo PtrInfo
884  = MachinePointerInfo::getFixedStack(*MF, FrameIndex);
885  MachineMemOperand *MMO
887  Size, Align);
888  unsigned SpillSize = TRI->getSpillSize(*RC);
889 
890  if (RI.isSGPRClass(RC)) {
891  MFI->setHasSpilledSGPRs();
892 
893  // We are only allowed to create one new instruction when spilling
894  // registers, so we need to use pseudo instruction for spilling SGPRs.
895  const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(SpillSize));
896 
897  // The SGPR spill/restore instructions only work on number sgprs, so we need
898  // to make sure we are using the correct register class.
899  if (TargetRegisterInfo::isVirtualRegister(SrcReg) && SpillSize == 4) {
901  MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass);
902  }
903 
904  MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc)
905  .addReg(SrcReg, getKillRegState(isKill)) // data
906  .addFrameIndex(FrameIndex) // addr
907  .addMemOperand(MMO)
909  .addReg(MFI->getFrameOffsetReg(), RegState::Implicit);
910  // Add the scratch resource registers as implicit uses because we may end up
911  // needing them, and need to ensure that the reserved registers are
912  // correctly handled.
913 
914  FrameInfo.setStackID(FrameIndex, SIStackID::SGPR_SPILL);
915  if (ST.hasScalarStores()) {
916  // m0 is used for offset to scalar stores if used to spill.
917  Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead);
918  }
919 
920  return;
921  }
922 
923  assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
924 
925  unsigned Opcode = getVGPRSpillSaveOpcode(SpillSize);
926  MFI->setHasSpilledVGPRs();
927  BuildMI(MBB, MI, DL, get(Opcode))
928  .addReg(SrcReg, getKillRegState(isKill)) // data
929  .addFrameIndex(FrameIndex) // addr
930  .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
931  .addReg(MFI->getFrameOffsetReg()) // scratch_offset
932  .addImm(0) // offset
933  .addMemOperand(MMO);
934 }
935 
936 static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
937  switch (Size) {
938  case 4:
939  return AMDGPU::SI_SPILL_S32_RESTORE;
940  case 8:
941  return AMDGPU::SI_SPILL_S64_RESTORE;
942  case 16:
943  return AMDGPU::SI_SPILL_S128_RESTORE;
944  case 32:
945  return AMDGPU::SI_SPILL_S256_RESTORE;
946  case 64:
947  return AMDGPU::SI_SPILL_S512_RESTORE;
948  default:
949  llvm_unreachable("unknown register size");
950  }
951 }
952 
953 static unsigned getVGPRSpillRestoreOpcode(unsigned Size) {
954  switch (Size) {
955  case 4:
956  return AMDGPU::SI_SPILL_V32_RESTORE;
957  case 8:
958  return AMDGPU::SI_SPILL_V64_RESTORE;
959  case 12:
960  return AMDGPU::SI_SPILL_V96_RESTORE;
961  case 16:
962  return AMDGPU::SI_SPILL_V128_RESTORE;
963  case 32:
964  return AMDGPU::SI_SPILL_V256_RESTORE;
965  case 64:
966  return AMDGPU::SI_SPILL_V512_RESTORE;
967  default:
968  llvm_unreachable("unknown register size");
969  }
970 }
971 
974  unsigned DestReg, int FrameIndex,
975  const TargetRegisterClass *RC,
976  const TargetRegisterInfo *TRI) const {
977  MachineFunction *MF = MBB.getParent();
979  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
980  const DebugLoc &DL = MBB.findDebugLoc(MI);
981  unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
982  unsigned Size = FrameInfo.getObjectSize(FrameIndex);
983  unsigned SpillSize = TRI->getSpillSize(*RC);
984 
985  MachinePointerInfo PtrInfo
986  = MachinePointerInfo::getFixedStack(*MF, FrameIndex);
987 
989  PtrInfo, MachineMemOperand::MOLoad, Size, Align);
990 
991  if (RI.isSGPRClass(RC)) {
992  MFI->setHasSpilledSGPRs();
993 
994  // FIXME: Maybe this should not include a memoperand because it will be
995  // lowered to non-memory instructions.
996  const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(SpillSize));
997  if (TargetRegisterInfo::isVirtualRegister(DestReg) && SpillSize == 4) {
999  MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass);
1000  }
1001 
1002  FrameInfo.setStackID(FrameIndex, SIStackID::SGPR_SPILL);
1003  MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc, DestReg)
1004  .addFrameIndex(FrameIndex) // addr
1005  .addMemOperand(MMO)
1007  .addReg(MFI->getFrameOffsetReg(), RegState::Implicit);
1008 
1009  if (ST.hasScalarStores()) {
1010  // m0 is used for offset to scalar stores if used to spill.
1011  Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead);
1012  }
1013 
1014  return;
1015  }
1016 
1017  assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
1018 
1019  unsigned Opcode = getVGPRSpillRestoreOpcode(SpillSize);
1020  BuildMI(MBB, MI, DL, get(Opcode), DestReg)
1021  .addFrameIndex(FrameIndex) // vaddr
1022  .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
1023  .addReg(MFI->getFrameOffsetReg()) // scratch_offset
1024  .addImm(0) // offset
1025  .addMemOperand(MMO);
1026 }
1027 
1028 /// \param @Offset Offset in bytes of the FrameIndex being spilled
1030  MachineBasicBlock &MBB, MachineInstr &MI, RegScavenger *RS, unsigned TmpReg,
1031  unsigned FrameOffset, unsigned Size) const {
1032  MachineFunction *MF = MBB.getParent();
1034  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
1035  const DebugLoc &DL = MBB.findDebugLoc(MI);
1036  unsigned WorkGroupSize = MFI->getMaxFlatWorkGroupSize();
1037  unsigned WavefrontSize = ST.getWavefrontSize();
1038 
1039  unsigned TIDReg = MFI->getTIDReg();
1040  if (!MFI->hasCalculatedTID()) {
1041  MachineBasicBlock &Entry = MBB.getParent()->front();
1042  MachineBasicBlock::iterator Insert = Entry.front();
1043  const DebugLoc &DL = Insert->getDebugLoc();
1044 
1045  TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass,
1046  *MF);
1047  if (TIDReg == AMDGPU::NoRegister)
1048  return TIDReg;
1049 
1051  WorkGroupSize > WavefrontSize) {
1052  unsigned TIDIGXReg
1054  unsigned TIDIGYReg
1056  unsigned TIDIGZReg
1058  unsigned InputPtrReg =
1060  for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
1061  if (!Entry.isLiveIn(Reg))
1062  Entry.addLiveIn(Reg);
1063  }
1064 
1065  RS->enterBasicBlock(Entry);
1066  // FIXME: Can we scavenge an SReg_64 and access the subregs?
1067  unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
1068  unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
1069  BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0)
1070  .addReg(InputPtrReg)
1072  BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1)
1073  .addReg(InputPtrReg)
1075 
1076  // NGROUPS.X * NGROUPS.Y
1077  BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1)
1078  .addReg(STmp1)
1079  .addReg(STmp0);
1080  // (NGROUPS.X * NGROUPS.Y) * TIDIG.X
1081  BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg)
1082  .addReg(STmp1)
1083  .addReg(TIDIGXReg);
1084  // NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)
1085  BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg)
1086  .addReg(STmp0)
1087  .addReg(TIDIGYReg)
1088  .addReg(TIDReg);
1089  // (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z
1090  getAddNoCarry(Entry, Insert, DL, TIDReg)
1091  .addReg(TIDReg)
1092  .addReg(TIDIGZReg);
1093  } else {
1094  // Get the wave id
1095  BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64),
1096  TIDReg)
1097  .addImm(-1)
1098  .addImm(0);
1099 
1100  BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64),
1101  TIDReg)
1102  .addImm(-1)
1103  .addReg(TIDReg);
1104  }
1105 
1106  BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32),
1107  TIDReg)
1108  .addImm(2)
1109  .addReg(TIDReg);
1110  MFI->setTIDReg(TIDReg);
1111  }
1112 
1113  // Add FrameIndex to LDS offset
1114  unsigned LDSOffset = MFI->getLDSSize() + (FrameOffset * WorkGroupSize);
1115  getAddNoCarry(MBB, MI, DL, TmpReg)
1116  .addImm(LDSOffset)
1117  .addReg(TIDReg);
1118 
1119  return TmpReg;
1120 }
1121 
1124  int Count) const {
1125  DebugLoc DL = MBB.findDebugLoc(MI);
1126  while (Count > 0) {
1127  int Arg;
1128  if (Count >= 8)
1129  Arg = 7;
1130  else
1131  Arg = Count - 1;
1132  Count -= 8;
1133  BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP))
1134  .addImm(Arg);
1135  }
1136 }
1137 
1140  insertWaitStates(MBB, MI, 1);
1141 }
1142 
1144  auto MF = MBB.getParent();
1146 
1147  assert(Info->isEntryFunction());
1148 
1149  if (MBB.succ_empty()) {
1150  bool HasNoTerminator = MBB.getFirstTerminator() == MBB.end();
1151  if (HasNoTerminator)
1152  BuildMI(MBB, MBB.end(), DebugLoc(),
1153  get(Info->returnsVoid() ? AMDGPU::S_ENDPGM : AMDGPU::SI_RETURN_TO_EPILOG));
1154  }
1155 }
1156 
1158  switch (MI.getOpcode()) {
1159  default: return 1; // FIXME: Do wait states equal cycles?
1160 
1161  case AMDGPU::S_NOP:
1162  return MI.getOperand(0).getImm() + 1;
1163  }
1164 }
1165 
1167  MachineBasicBlock &MBB = *MI.getParent();
1168  DebugLoc DL = MBB.findDebugLoc(MI);
1169  switch (MI.getOpcode()) {
1170  default: return TargetInstrInfo::expandPostRAPseudo(MI);
1171  case AMDGPU::S_MOV_B64_term:
1172  // This is only a terminator to get the correct spill code placement during
1173  // register allocation.
1174  MI.setDesc(get(AMDGPU::S_MOV_B64));
1175  break;
1176 
1177  case AMDGPU::S_XOR_B64_term:
1178  // This is only a terminator to get the correct spill code placement during
1179  // register allocation.
1180  MI.setDesc(get(AMDGPU::S_XOR_B64));
1181  break;
1182 
1183  case AMDGPU::S_ANDN2_B64_term:
1184  // This is only a terminator to get the correct spill code placement during
1185  // register allocation.
1186  MI.setDesc(get(AMDGPU::S_ANDN2_B64));
1187  break;
1188 
1189  case AMDGPU::V_MOV_B64_PSEUDO: {
1190  unsigned Dst = MI.getOperand(0).getReg();
1191  unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
1192  unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
1193 
1194  const MachineOperand &SrcOp = MI.getOperand(1);
1195  // FIXME: Will this work for 64-bit floating point immediates?
1196  assert(!SrcOp.isFPImm());
1197  if (SrcOp.isImm()) {
1198  APInt Imm(64, SrcOp.getImm());
1199  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
1200  .addImm(Imm.getLoBits(32).getZExtValue())
1201  .addReg(Dst, RegState::Implicit | RegState::Define);
1202  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
1203  .addImm(Imm.getHiBits(32).getZExtValue())
1204  .addReg(Dst, RegState::Implicit | RegState::Define);
1205  } else {
1206  assert(SrcOp.isReg());
1207  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
1208  .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0))
1210  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
1211  .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1))
1213  }
1214  MI.eraseFromParent();
1215  break;
1216  }
1217  case AMDGPU::V_SET_INACTIVE_B32: {
1218  BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
1219  .addReg(AMDGPU::EXEC);
1220  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg())
1221  .add(MI.getOperand(2));
1222  BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
1223  .addReg(AMDGPU::EXEC);
1224  MI.eraseFromParent();
1225  break;
1226  }
1227  case AMDGPU::V_SET_INACTIVE_B64: {
1228  BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
1229  .addReg(AMDGPU::EXEC);
1230  MachineInstr *Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO),
1231  MI.getOperand(0).getReg())
1232  .add(MI.getOperand(2));
1233  expandPostRAPseudo(*Copy);
1234  BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
1235  .addReg(AMDGPU::EXEC);
1236  MI.eraseFromParent();
1237  break;
1238  }
1239  case AMDGPU::V_MOVRELD_B32_V1:
1240  case AMDGPU::V_MOVRELD_B32_V2:
1241  case AMDGPU::V_MOVRELD_B32_V4:
1242  case AMDGPU::V_MOVRELD_B32_V8:
1243  case AMDGPU::V_MOVRELD_B32_V16: {
1244  const MCInstrDesc &MovRelDesc = get(AMDGPU::V_MOVRELD_B32_e32);
1245  unsigned VecReg = MI.getOperand(0).getReg();
1246  bool IsUndef = MI.getOperand(1).isUndef();
1247  unsigned SubReg = AMDGPU::sub0 + MI.getOperand(3).getImm();
1248  assert(VecReg == MI.getOperand(1).getReg());
1249 
1250  MachineInstr *MovRel =
1251  BuildMI(MBB, MI, DL, MovRelDesc)
1252  .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef)
1253  .add(MI.getOperand(2))
1254  .addReg(VecReg, RegState::ImplicitDefine)
1255  .addReg(VecReg,
1256  RegState::Implicit | (IsUndef ? RegState::Undef : 0));
1257 
1258  const int ImpDefIdx =
1259  MovRelDesc.getNumOperands() + MovRelDesc.getNumImplicitUses();
1260  const int ImpUseIdx = ImpDefIdx + 1;
1261  MovRel->tieOperands(ImpDefIdx, ImpUseIdx);
1262 
1263  MI.eraseFromParent();
1264  break;
1265  }
1266  case AMDGPU::SI_PC_ADD_REL_OFFSET: {
1267  MachineFunction &MF = *MBB.getParent();
1268  unsigned Reg = MI.getOperand(0).getReg();
1269  unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
1270  unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
1271 
1272  // Create a bundle so these instructions won't be re-ordered by the
1273  // post-RA scheduler.
1274  MIBundleBuilder Bundler(MBB, MI);
1275  Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg));
1276 
1277  // Add 32-bit offset from this instruction to the start of the
1278  // constant data.
1279  Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo)
1280  .addReg(RegLo)
1281  .add(MI.getOperand(1)));
1282 
1283  MachineInstrBuilder MIB = BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi)
1284  .addReg(RegHi);
1286  MIB.addImm(0);
1287  else
1288  MIB.add(MI.getOperand(2));
1289 
1290  Bundler.append(MIB);
1291  finalizeBundle(MBB, Bundler.begin());
1292 
1293  MI.eraseFromParent();
1294  break;
1295  }
1296  case AMDGPU::EXIT_WWM: {
1297  // This only gets its own opcode so that SIFixWWMLiveness can tell when WWM
1298  // is exited.
1299  MI.setDesc(get(AMDGPU::S_MOV_B64));
1300  break;
1301  }
1302  case TargetOpcode::BUNDLE: {
1303  if (!MI.mayLoad())
1304  return false;
1305 
1306  // If it is a load it must be a memory clause
1308  I->isBundledWithSucc(); ++I) {
1309  I->unbundleFromSucc();
1310  for (MachineOperand &MO : I->operands())
1311  if (MO.isReg())
1312  MO.setIsInternalRead(false);
1313  }
1314 
1315  MI.eraseFromParent();
1316  break;
1317  }
1318  }
1319  return true;
1320 }
1321 
1323  MachineOperand &Src0,
1324  unsigned Src0OpName,
1325  MachineOperand &Src1,
1326  unsigned Src1OpName) const {
1327  MachineOperand *Src0Mods = getNamedOperand(MI, Src0OpName);
1328  if (!Src0Mods)
1329  return false;
1330 
1331  MachineOperand *Src1Mods = getNamedOperand(MI, Src1OpName);
1332  assert(Src1Mods &&
1333  "All commutable instructions have both src0 and src1 modifiers");
1334 
1335  int Src0ModsVal = Src0Mods->getImm();
1336  int Src1ModsVal = Src1Mods->getImm();
1337 
1338  Src1Mods->setImm(Src0ModsVal);
1339  Src0Mods->setImm(Src1ModsVal);
1340  return true;
1341 }
1342 
1344  MachineOperand &RegOp,
1345  MachineOperand &NonRegOp) {
1346  unsigned Reg = RegOp.getReg();
1347  unsigned SubReg = RegOp.getSubReg();
1348  bool IsKill = RegOp.isKill();
1349  bool IsDead = RegOp.isDead();
1350  bool IsUndef = RegOp.isUndef();
1351  bool IsDebug = RegOp.isDebug();
1352 
1353  if (NonRegOp.isImm())
1354  RegOp.ChangeToImmediate(NonRegOp.getImm());
1355  else if (NonRegOp.isFI())
1356  RegOp.ChangeToFrameIndex(NonRegOp.getIndex());
1357  else
1358  return nullptr;
1359 
1360  NonRegOp.ChangeToRegister(Reg, false, false, IsKill, IsDead, IsUndef, IsDebug);
1361  NonRegOp.setSubReg(SubReg);
1362 
1363  return &MI;
1364 }
1365 
1367  unsigned Src0Idx,
1368  unsigned Src1Idx) const {
1369  assert(!NewMI && "this should never be used");
1370 
1371  unsigned Opc = MI.getOpcode();
1372  int CommutedOpcode = commuteOpcode(Opc);
1373  if (CommutedOpcode == -1)
1374  return nullptr;
1375 
1376  assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) ==
1377  static_cast<int>(Src0Idx) &&
1378  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) ==
1379  static_cast<int>(Src1Idx) &&
1380  "inconsistency with findCommutedOpIndices");
1381 
1382  MachineOperand &Src0 = MI.getOperand(Src0Idx);
1383  MachineOperand &Src1 = MI.getOperand(Src1Idx);
1384 
1385  MachineInstr *CommutedMI = nullptr;
1386  if (Src0.isReg() && Src1.isReg()) {
1387  if (isOperandLegal(MI, Src1Idx, &Src0)) {
1388  // Be sure to copy the source modifiers to the right place.
1389  CommutedMI
1390  = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, Src0Idx, Src1Idx);
1391  }
1392 
1393  } else if (Src0.isReg() && !Src1.isReg()) {
1394  // src0 should always be able to support any operand type, so no need to
1395  // check operand legality.
1396  CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1);
1397  } else if (!Src0.isReg() && Src1.isReg()) {
1398  if (isOperandLegal(MI, Src1Idx, &Src0))
1399  CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
1400  } else {
1401  // FIXME: Found two non registers to commute. This does happen.
1402  return nullptr;
1403  }
1404 
1405  if (CommutedMI) {
1406  swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_modifiers,
1407  Src1, AMDGPU::OpName::src1_modifiers);
1408 
1409  CommutedMI->setDesc(get(CommutedOpcode));
1410  }
1411 
1412  return CommutedMI;
1413 }
1414 
1415 // This needs to be implemented because the source modifiers may be inserted
1416 // between the true commutable operands, and the base
1417 // TargetInstrInfo::commuteInstruction uses it.
1419  unsigned &SrcOpIdx1) const {
1420  return findCommutedOpIndices(MI.getDesc(), SrcOpIdx0, SrcOpIdx1);
1421 }
1422 
1423 bool SIInstrInfo::findCommutedOpIndices(MCInstrDesc Desc, unsigned &SrcOpIdx0,
1424  unsigned &SrcOpIdx1) const {
1425  if (!Desc.isCommutable())
1426  return false;
1427 
1428  unsigned Opc = Desc.getOpcode();
1429  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
1430  if (Src0Idx == -1)
1431  return false;
1432 
1433  int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
1434  if (Src1Idx == -1)
1435  return false;
1436 
1437  return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
1438 }
1439 
1440 bool SIInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
1441  int64_t BrOffset) const {
1442  // BranchRelaxation should never have to check s_setpc_b64 because its dest
1443  // block is unanalyzable.
1444  assert(BranchOp != AMDGPU::S_SETPC_B64);
1445 
1446  // Convert to dwords.
1447  BrOffset /= 4;
1448 
1449  // The branch instructions do PC += signext(SIMM16 * 4) + 4, so the offset is
1450  // from the next instruction.
1451  BrOffset -= 1;
1452 
1453  return isIntN(BranchOffsetBits, BrOffset);
1454 }
1455 
1457  const MachineInstr &MI) const {
1458  if (MI.getOpcode() == AMDGPU::S_SETPC_B64) {
1459  // This would be a difficult analysis to perform, but can always be legal so
1460  // there's no need to analyze it.
1461  return nullptr;
1462  }
1463 
1464  return MI.getOperand(0).getMBB();
1465 }
1466 
1468  MachineBasicBlock &DestBB,
1469  const DebugLoc &DL,
1470  int64_t BrOffset,
1471  RegScavenger *RS) const {
1472  assert(RS && "RegScavenger required for long branching");
1473  assert(MBB.empty() &&
1474  "new block should be inserted for expanding unconditional branch");
1475  assert(MBB.pred_size() == 1);
1476 
1477  MachineFunction *MF = MBB.getParent();
1478  MachineRegisterInfo &MRI = MF->getRegInfo();
1479 
1480  // FIXME: Virtual register workaround for RegScavenger not working with empty
1481  // blocks.
1482  unsigned PCReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
1483 
1484  auto I = MBB.end();
1485 
1486  // We need to compute the offset relative to the instruction immediately after
1487  // s_getpc_b64. Insert pc arithmetic code before last terminator.
1488  MachineInstr *GetPC = BuildMI(MBB, I, DL, get(AMDGPU::S_GETPC_B64), PCReg);
1489 
1490  // TODO: Handle > 32-bit block address.
1491  if (BrOffset >= 0) {
1492  BuildMI(MBB, I, DL, get(AMDGPU::S_ADD_U32))
1493  .addReg(PCReg, RegState::Define, AMDGPU::sub0)
1494  .addReg(PCReg, 0, AMDGPU::sub0)
1496  BuildMI(MBB, I, DL, get(AMDGPU::S_ADDC_U32))
1497  .addReg(PCReg, RegState::Define, AMDGPU::sub1)
1498  .addReg(PCReg, 0, AMDGPU::sub1)
1499  .addImm(0);
1500  } else {
1501  // Backwards branch.
1502  BuildMI(MBB, I, DL, get(AMDGPU::S_SUB_U32))
1503  .addReg(PCReg, RegState::Define, AMDGPU::sub0)
1504  .addReg(PCReg, 0, AMDGPU::sub0)
1506  BuildMI(MBB, I, DL, get(AMDGPU::S_SUBB_U32))
1507  .addReg(PCReg, RegState::Define, AMDGPU::sub1)
1508  .addReg(PCReg, 0, AMDGPU::sub1)
1509  .addImm(0);
1510  }
1511 
1512  // Insert the indirect branch after the other terminator.
1513  BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64))
1514  .addReg(PCReg);
1515 
1516  // FIXME: If spilling is necessary, this will fail because this scavenger has
1517  // no emergency stack slots. It is non-trivial to spill in this situation,
1518  // because the restore code needs to be specially placed after the
1519  // jump. BranchRelaxation then needs to be made aware of the newly inserted
1520  // block.
1521  //
1522  // If a spill is needed for the pc register pair, we need to insert a spill
1523  // restore block right before the destination block, and insert a short branch
1524  // into the old destination block's fallthrough predecessor.
1525  // e.g.:
1526  //
1527  // s_cbranch_scc0 skip_long_branch:
1528  //
1529  // long_branch_bb:
1530  // spill s[8:9]
1531  // s_getpc_b64 s[8:9]
1532  // s_add_u32 s8, s8, restore_bb
1533  // s_addc_u32 s9, s9, 0
1534  // s_setpc_b64 s[8:9]
1535  //
1536  // skip_long_branch:
1537  // foo;
1538  //
1539  // .....
1540  //
1541  // dest_bb_fallthrough_predecessor:
1542  // bar;
1543  // s_branch dest_bb
1544  //
1545  // restore_bb:
1546  // restore s[8:9]
1547  // fallthrough dest_bb
1548  ///
1549  // dest_bb:
1550  // buzz;
1551 
1552  RS->enterBasicBlockEnd(MBB);
1553  unsigned Scav = RS->scavengeRegisterBackwards(
1554  AMDGPU::SReg_64RegClass,
1555  MachineBasicBlock::iterator(GetPC), false, 0);
1556  MRI.replaceRegWith(PCReg, Scav);
1557  MRI.clearVirtRegs();
1558  RS->setRegUsed(Scav);
1559 
1560  return 4 + 8 + 4 + 4;
1561 }
1562 
1563 unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) {
1564  switch (Cond) {
1565  case SIInstrInfo::SCC_TRUE:
1566  return AMDGPU::S_CBRANCH_SCC1;
1567  case SIInstrInfo::SCC_FALSE:
1568  return AMDGPU::S_CBRANCH_SCC0;
1569  case SIInstrInfo::VCCNZ:
1570  return AMDGPU::S_CBRANCH_VCCNZ;
1571  case SIInstrInfo::VCCZ:
1572  return AMDGPU::S_CBRANCH_VCCZ;
1573  case SIInstrInfo::EXECNZ:
1574  return AMDGPU::S_CBRANCH_EXECNZ;
1575  case SIInstrInfo::EXECZ:
1576  return AMDGPU::S_CBRANCH_EXECZ;
1577  default:
1578  llvm_unreachable("invalid branch predicate");
1579  }
1580 }
1581 
1582 SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(unsigned Opcode) {
1583  switch (Opcode) {
1584  case AMDGPU::S_CBRANCH_SCC0:
1585  return SCC_FALSE;
1586  case AMDGPU::S_CBRANCH_SCC1:
1587  return SCC_TRUE;
1588  case AMDGPU::S_CBRANCH_VCCNZ:
1589  return VCCNZ;
1590  case AMDGPU::S_CBRANCH_VCCZ:
1591  return VCCZ;
1592  case AMDGPU::S_CBRANCH_EXECNZ:
1593  return EXECNZ;
1594  case AMDGPU::S_CBRANCH_EXECZ:
1595  return EXECZ;
1596  default:
1597  return INVALID_BR;
1598  }
1599 }
1600 
1603  MachineBasicBlock *&TBB,
1604  MachineBasicBlock *&FBB,
1606  bool AllowModify) const {
1607  if (I->getOpcode() == AMDGPU::S_BRANCH) {
1608  // Unconditional Branch
1609  TBB = I->getOperand(0).getMBB();
1610  return false;
1611  }
1612 
1613  MachineBasicBlock *CondBB = nullptr;
1614 
1615  if (I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
1616  CondBB = I->getOperand(1).getMBB();
1617  Cond.push_back(I->getOperand(0));
1618  } else {
1619  BranchPredicate Pred = getBranchPredicate(I->getOpcode());
1620  if (Pred == INVALID_BR)
1621  return true;
1622 
1623  CondBB = I->getOperand(0).getMBB();
1625  Cond.push_back(I->getOperand(1)); // Save the branch register.
1626  }
1627  ++I;
1628 
1629  if (I == MBB.end()) {
1630  // Conditional branch followed by fall-through.
1631  TBB = CondBB;
1632  return false;
1633  }
1634 
1635  if (I->getOpcode() == AMDGPU::S_BRANCH) {
1636  TBB = CondBB;
1637  FBB = I->getOperand(0).getMBB();
1638  return false;
1639  }
1640 
1641  return true;
1642 }
1643 
1645  MachineBasicBlock *&FBB,
1647  bool AllowModify) const {
1649  auto E = MBB.end();
1650  if (I == E)
1651  return false;
1652 
1653  // Skip over the instructions that are artificially terminators for special
1654  // exec management.
1655  while (I != E && !I->isBranch() && !I->isReturn() &&
1656  I->getOpcode() != AMDGPU::SI_MASK_BRANCH) {
1657  switch (I->getOpcode()) {
1658  case AMDGPU::SI_MASK_BRANCH:
1659  case AMDGPU::S_MOV_B64_term:
1660  case AMDGPU::S_XOR_B64_term:
1661  case AMDGPU::S_ANDN2_B64_term:
1662  break;
1663  case AMDGPU::SI_IF:
1664  case AMDGPU::SI_ELSE:
1665  case AMDGPU::SI_KILL_I1_TERMINATOR:
1666  case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
1667  // FIXME: It's messy that these need to be considered here at all.
1668  return true;
1669  default:
1670  llvm_unreachable("unexpected non-branch terminator inst");
1671  }
1672 
1673  ++I;
1674  }
1675 
1676  if (I == E)
1677  return false;
1678 
1679  if (I->getOpcode() != AMDGPU::SI_MASK_BRANCH)
1680  return analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify);
1681 
1682  ++I;
1683 
1684  // TODO: Should be able to treat as fallthrough?
1685  if (I == MBB.end())
1686  return true;
1687 
1688  if (analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify))
1689  return true;
1690 
1691  MachineBasicBlock *MaskBrDest = I->getOperand(0).getMBB();
1692 
1693  // Specifically handle the case where the conditional branch is to the same
1694  // destination as the mask branch. e.g.
1695  //
1696  // si_mask_branch BB8
1697  // s_cbranch_execz BB8
1698  // s_cbranch BB9
1699  //
1700  // This is required to understand divergent loops which may need the branches
1701  // to be relaxed.
1702  if (TBB != MaskBrDest || Cond.empty())
1703  return true;
1704 
1705  auto Pred = Cond[0].getImm();
1706  return (Pred != EXECZ && Pred != EXECNZ);
1707 }
1708 
1710  int *BytesRemoved) const {
1712 
1713  unsigned Count = 0;
1714  unsigned RemovedSize = 0;
1715  while (I != MBB.end()) {
1716  MachineBasicBlock::iterator Next = std::next(I);
1717  if (I->getOpcode() == AMDGPU::SI_MASK_BRANCH) {
1718  I = Next;
1719  continue;
1720  }
1721 
1722  RemovedSize += getInstSizeInBytes(*I);
1723  I->eraseFromParent();
1724  ++Count;
1725  I = Next;
1726  }
1727 
1728  if (BytesRemoved)
1729  *BytesRemoved = RemovedSize;
1730 
1731  return Count;
1732 }
1733 
1734 // Copy the flags onto the implicit condition register operand.
1736  const MachineOperand &OrigCond) {
1737  CondReg.setIsUndef(OrigCond.isUndef());
1738  CondReg.setIsKill(OrigCond.isKill());
1739 }
1740 
1742  MachineBasicBlock *TBB,
1743  MachineBasicBlock *FBB,
1745  const DebugLoc &DL,
1746  int *BytesAdded) const {
1747  if (!FBB && Cond.empty()) {
1748  BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
1749  .addMBB(TBB);
1750  if (BytesAdded)
1751  *BytesAdded = 4;
1752  return 1;
1753  }
1754 
1755  if(Cond.size() == 1 && Cond[0].isReg()) {
1756  BuildMI(&MBB, DL, get(AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO))
1757  .add(Cond[0])
1758  .addMBB(TBB);
1759  return 1;
1760  }
1761 
1762  assert(TBB && Cond[0].isImm());
1763 
1764  unsigned Opcode
1765  = getBranchOpcode(static_cast<BranchPredicate>(Cond[0].getImm()));
1766 
1767  if (!FBB) {
1768  Cond[1].isUndef();
1769  MachineInstr *CondBr =
1770  BuildMI(&MBB, DL, get(Opcode))
1771  .addMBB(TBB);
1772 
1773  // Copy the flags onto the implicit condition register operand.
1774  preserveCondRegFlags(CondBr->getOperand(1), Cond[1]);
1775 
1776  if (BytesAdded)
1777  *BytesAdded = 4;
1778  return 1;
1779  }
1780 
1781  assert(TBB && FBB);
1782 
1783  MachineInstr *CondBr =
1784  BuildMI(&MBB, DL, get(Opcode))
1785  .addMBB(TBB);
1786  BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
1787  .addMBB(FBB);
1788 
1789  MachineOperand &CondReg = CondBr->getOperand(1);
1790  CondReg.setIsUndef(Cond[1].isUndef());
1791  CondReg.setIsKill(Cond[1].isKill());
1792 
1793  if (BytesAdded)
1794  *BytesAdded = 8;
1795 
1796  return 2;
1797 }
1798 
1800  SmallVectorImpl<MachineOperand> &Cond) const {
1801  if (Cond.size() != 2) {
1802  return true;
1803  }
1804 
1805  if (Cond[0].isImm()) {
1806  Cond[0].setImm(-Cond[0].getImm());
1807  return false;
1808  }
1809 
1810  return true;
1811 }
1812 
1815  unsigned TrueReg, unsigned FalseReg,
1816  int &CondCycles,
1817  int &TrueCycles, int &FalseCycles) const {
1818  switch (Cond[0].getImm()) {
1819  case VCCNZ:
1820  case VCCZ: {
1821  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
1822  const TargetRegisterClass *RC = MRI.getRegClass(TrueReg);
1823  assert(MRI.getRegClass(FalseReg) == RC);
1824 
1825  int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32;
1826  CondCycles = TrueCycles = FalseCycles = NumInsts; // ???
1827 
1828  // Limit to equal cost for branch vs. N v_cndmask_b32s.
1829  return !RI.isSGPRClass(RC) && NumInsts <= 6;
1830  }
1831  case SCC_TRUE:
1832  case SCC_FALSE: {
1833  // FIXME: We could insert for VGPRs if we could replace the original compare
1834  // with a vector one.
1835  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
1836  const TargetRegisterClass *RC = MRI.getRegClass(TrueReg);
1837  assert(MRI.getRegClass(FalseReg) == RC);
1838 
1839  int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32;
1840 
1841  // Multiples of 8 can do s_cselect_b64
1842  if (NumInsts % 2 == 0)
1843  NumInsts /= 2;
1844 
1845  CondCycles = TrueCycles = FalseCycles = NumInsts; // ???
1846  return RI.isSGPRClass(RC);
1847  }
1848  default:
1849  return false;
1850  }
1851 }
1852 
1855  unsigned DstReg, ArrayRef<MachineOperand> Cond,
1856  unsigned TrueReg, unsigned FalseReg) const {
1857  BranchPredicate Pred = static_cast<BranchPredicate>(Cond[0].getImm());
1858  if (Pred == VCCZ || Pred == SCC_FALSE) {
1859  Pred = static_cast<BranchPredicate>(-Pred);
1860  std::swap(TrueReg, FalseReg);
1861  }
1862 
1864  const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg);
1865  unsigned DstSize = RI.getRegSizeInBits(*DstRC);
1866 
1867  if (DstSize == 32) {
1868  unsigned SelOp = Pred == SCC_TRUE ?
1869  AMDGPU::S_CSELECT_B32 : AMDGPU::V_CNDMASK_B32_e32;
1870 
1871  // Instruction's operands are backwards from what is expected.
1872  MachineInstr *Select =
1873  BuildMI(MBB, I, DL, get(SelOp), DstReg)
1874  .addReg(FalseReg)
1875  .addReg(TrueReg);
1876 
1877  preserveCondRegFlags(Select->getOperand(3), Cond[1]);
1878  return;
1879  }
1880 
1881  if (DstSize == 64 && Pred == SCC_TRUE) {
1882  MachineInstr *Select =
1883  BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), DstReg)
1884  .addReg(FalseReg)
1885  .addReg(TrueReg);
1886 
1887  preserveCondRegFlags(Select->getOperand(3), Cond[1]);
1888  return;
1889  }
1890 
1891  static const int16_t Sub0_15[] = {
1892  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1893  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1894  AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1895  AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1896  };
1897 
1898  static const int16_t Sub0_15_64[] = {
1899  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1900  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1901  AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1902  AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
1903  };
1904 
1905  unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
1906  const TargetRegisterClass *EltRC = &AMDGPU::VGPR_32RegClass;
1907  const int16_t *SubIndices = Sub0_15;
1908  int NElts = DstSize / 32;
1909 
1910  // 64-bit select is only avaialble for SALU.
1911  if (Pred == SCC_TRUE) {
1912  SelOp = AMDGPU::S_CSELECT_B64;
1913  EltRC = &AMDGPU::SGPR_64RegClass;
1914  SubIndices = Sub0_15_64;
1915 
1916  assert(NElts % 2 == 0);
1917  NElts /= 2;
1918  }
1919 
1921  MBB, I, DL, get(AMDGPU::REG_SEQUENCE), DstReg);
1922 
1923  I = MIB->getIterator();
1924 
1926  for (int Idx = 0; Idx != NElts; ++Idx) {
1927  unsigned DstElt = MRI.createVirtualRegister(EltRC);
1928  Regs.push_back(DstElt);
1929 
1930  unsigned SubIdx = SubIndices[Idx];
1931 
1932  MachineInstr *Select =
1933  BuildMI(MBB, I, DL, get(SelOp), DstElt)
1934  .addReg(FalseReg, 0, SubIdx)
1935  .addReg(TrueReg, 0, SubIdx);
1936  preserveCondRegFlags(Select->getOperand(3), Cond[1]);
1937 
1938  MIB.addReg(DstElt)
1939  .addImm(SubIdx);
1940  }
1941 }
1942 
1944  switch (MI.getOpcode()) {
1945  case AMDGPU::V_MOV_B32_e32:
1946  case AMDGPU::V_MOV_B32_e64:
1947  case AMDGPU::V_MOV_B64_PSEUDO: {
1948  // If there are additional implicit register operands, this may be used for
1949  // register indexing so the source register operand isn't simply copied.
1950  unsigned NumOps = MI.getDesc().getNumOperands() +
1951  MI.getDesc().getNumImplicitUses();
1952 
1953  return MI.getNumOperands() == NumOps;
1954  }
1955  case AMDGPU::S_MOV_B32:
1956  case AMDGPU::S_MOV_B64:
1957  case AMDGPU::COPY:
1958  return true;
1959  default:
1960  return false;
1961  }
1962 }
1963 
1965  unsigned Kind) const {
1966  switch(Kind) {
1977  }
1978  return AMDGPUAS::FLAT_ADDRESS;
1979 }
1980 
1982  unsigned Opc = MI.getOpcode();
1983  int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc,
1984  AMDGPU::OpName::src0_modifiers);
1985  int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc,
1986  AMDGPU::OpName::src1_modifiers);
1987  int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc,
1988  AMDGPU::OpName::src2_modifiers);
1989 
1990  MI.RemoveOperand(Src2ModIdx);
1991  MI.RemoveOperand(Src1ModIdx);
1992  MI.RemoveOperand(Src0ModIdx);
1993 }
1994 
1996  unsigned Reg, MachineRegisterInfo *MRI) const {
1997  if (!MRI->hasOneNonDBGUse(Reg))
1998  return false;
1999 
2000  switch (DefMI.getOpcode()) {
2001  default:
2002  return false;
2003  case AMDGPU::S_MOV_B64:
2004  // TODO: We could fold 64-bit immediates, but this get compilicated
2005  // when there are sub-registers.
2006  return false;
2007 
2008  case AMDGPU::V_MOV_B32_e32:
2009  case AMDGPU::S_MOV_B32:
2010  break;
2011  }
2012 
2013  const MachineOperand *ImmOp = getNamedOperand(DefMI, AMDGPU::OpName::src0);
2014  assert(ImmOp);
2015  // FIXME: We could handle FrameIndex values here.
2016  if (!ImmOp->isImm())
2017  return false;
2018 
2019  unsigned Opc = UseMI.getOpcode();
2020  if (Opc == AMDGPU::COPY) {
2021  bool isVGPRCopy = RI.isVGPR(*MRI, UseMI.getOperand(0).getReg());
2022  unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2023  UseMI.setDesc(get(NewOpc));
2024  UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm());
2025  UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent());
2026  return true;
2027  }
2028 
2029  if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64 ||
2030  Opc == AMDGPU::V_MAD_F16 || Opc == AMDGPU::V_MAC_F16_e64) {
2031  // Don't fold if we are using source or output modifiers. The new VOP2
2032  // instructions don't have them.
2033  if (hasAnyModifiersSet(UseMI))
2034  return false;
2035 
2036  // If this is a free constant, there's no reason to do this.
2037  // TODO: We could fold this here instead of letting SIFoldOperands do it
2038  // later.
2039  MachineOperand *Src0 = getNamedOperand(UseMI, AMDGPU::OpName::src0);
2040 
2041  // Any src operand can be used for the legality check.
2042  if (isInlineConstant(UseMI, *Src0, *ImmOp))
2043  return false;
2044 
2045  bool IsF32 = Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64;
2046  MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1);
2047  MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
2048 
2049  // Multiplied part is the constant: Use v_madmk_{f16, f32}.
2050  // We should only expect these to be on src0 due to canonicalizations.
2051  if (Src0->isReg() && Src0->getReg() == Reg) {
2052  if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
2053  return false;
2054 
2055  if (!Src2->isReg() || RI.isSGPRClass(MRI->getRegClass(Src2->getReg())))
2056  return false;
2057 
2058  // We need to swap operands 0 and 1 since madmk constant is at operand 1.
2059 
2060  const int64_t Imm = ImmOp->getImm();
2061 
2062  // FIXME: This would be a lot easier if we could return a new instruction
2063  // instead of having to modify in place.
2064 
2065  // Remove these first since they are at the end.
2066  UseMI.RemoveOperand(
2067  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod));
2068  UseMI.RemoveOperand(
2069  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp));
2070 
2071  unsigned Src1Reg = Src1->getReg();
2072  unsigned Src1SubReg = Src1->getSubReg();
2073  Src0->setReg(Src1Reg);
2074  Src0->setSubReg(Src1SubReg);
2075  Src0->setIsKill(Src1->isKill());
2076 
2077  if (Opc == AMDGPU::V_MAC_F32_e64 ||
2078  Opc == AMDGPU::V_MAC_F16_e64)
2079  UseMI.untieRegOperand(
2080  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
2081 
2082  Src1->ChangeToImmediate(Imm);
2083 
2084  removeModOperands(UseMI);
2085  UseMI.setDesc(get(IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16));
2086 
2087  bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
2088  if (DeleteDef)
2089  DefMI.eraseFromParent();
2090 
2091  return true;
2092  }
2093 
2094  // Added part is the constant: Use v_madak_{f16, f32}.
2095  if (Src2->isReg() && Src2->getReg() == Reg) {
2096  // Not allowed to use constant bus for another operand.
2097  // We can however allow an inline immediate as src0.
2098  bool Src0Inlined = false;
2099  if (Src0->isReg()) {
2100  // Try to inline constant if possible.
2101  // If the Def moves immediate and the use is single
2102  // We are saving VGPR here.
2103  MachineInstr *Def = MRI->getUniqueVRegDef(Src0->getReg());
2104  if (Def && Def->isMoveImmediate() &&
2105  isInlineConstant(Def->getOperand(1)) &&
2106  MRI->hasOneUse(Src0->getReg())) {
2107  Src0->ChangeToImmediate(Def->getOperand(1).getImm());
2108  Src0Inlined = true;
2109  } else if ((RI.isPhysicalRegister(Src0->getReg()) &&
2110  RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg()))) ||
2111  (RI.isVirtualRegister(Src0->getReg()) &&
2112  RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
2113  return false;
2114  // VGPR is okay as Src0 - fallthrough
2115  }
2116 
2117  if (Src1->isReg() && !Src0Inlined ) {
2118  // We have one slot for inlinable constant so far - try to fill it
2119  MachineInstr *Def = MRI->getUniqueVRegDef(Src1->getReg());
2120  if (Def && Def->isMoveImmediate() &&
2121  isInlineConstant(Def->getOperand(1)) &&
2122  MRI->hasOneUse(Src1->getReg()) &&
2123  commuteInstruction(UseMI)) {
2124  Src0->ChangeToImmediate(Def->getOperand(1).getImm());
2125  } else if ((RI.isPhysicalRegister(Src1->getReg()) &&
2126  RI.isSGPRClass(RI.getPhysRegClass(Src1->getReg()))) ||
2127  (RI.isVirtualRegister(Src1->getReg()) &&
2128  RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))))
2129  return false;
2130  // VGPR is okay as Src1 - fallthrough
2131  }
2132 
2133  const int64_t Imm = ImmOp->getImm();
2134 
2135  // FIXME: This would be a lot easier if we could return a new instruction
2136  // instead of having to modify in place.
2137 
2138  // Remove these first since they are at the end.
2139  UseMI.RemoveOperand(
2140  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod));
2141  UseMI.RemoveOperand(
2142  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp));
2143 
2144  if (Opc == AMDGPU::V_MAC_F32_e64 ||
2145  Opc == AMDGPU::V_MAC_F16_e64)
2146  UseMI.untieRegOperand(
2147  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
2148 
2149  // ChangingToImmediate adds Src2 back to the instruction.
2150  Src2->ChangeToImmediate(Imm);
2151 
2152  // These come before src2.
2153  removeModOperands(UseMI);
2154  UseMI.setDesc(get(IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16));
2155 
2156  bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
2157  if (DeleteDef)
2158  DefMI.eraseFromParent();
2159 
2160  return true;
2161  }
2162  }
2163 
2164  return false;
2165 }
2166 
2167 static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
2168  int WidthB, int OffsetB) {
2169  int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
2170  int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
2171  int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
2172  return LowOffset + LowWidth <= HighOffset;
2173 }
2174 
2175 bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr &MIa,
2176  MachineInstr &MIb) const {
2177  MachineOperand *BaseOp0, *BaseOp1;
2178  int64_t Offset0, Offset1;
2179 
2180  if (getMemOperandWithOffset(MIa, BaseOp0, Offset0, &RI) &&
2181  getMemOperandWithOffset(MIb, BaseOp1, Offset1, &RI)) {
2182  if (!BaseOp0->isIdenticalTo(*BaseOp1))
2183  return false;
2184 
2185  if (!MIa.hasOneMemOperand() || !MIb.hasOneMemOperand()) {
2186  // FIXME: Handle ds_read2 / ds_write2.
2187  return false;
2188  }
2189  unsigned Width0 = (*MIa.memoperands_begin())->getSize();
2190  unsigned Width1 = (*MIb.memoperands_begin())->getSize();
2191  if (offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) {
2192  return true;
2193  }
2194  }
2195 
2196  return false;
2197 }
2198 
2200  MachineInstr &MIb,
2201  AliasAnalysis *AA) const {
2202  assert((MIa.mayLoad() || MIa.mayStore()) &&
2203  "MIa must load from or modify a memory location");
2204  assert((MIb.mayLoad() || MIb.mayStore()) &&
2205  "MIb must load from or modify a memory location");
2206 
2208  return false;
2209 
2210  // XXX - Can we relax this between address spaces?
2211  if (MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
2212  return false;
2213 
2214  if (AA && MIa.hasOneMemOperand() && MIb.hasOneMemOperand()) {
2215  const MachineMemOperand *MMOa = *MIa.memoperands_begin();
2216  const MachineMemOperand *MMOb = *MIb.memoperands_begin();
2217  if (MMOa->getValue() && MMOb->getValue()) {
2218  MemoryLocation LocA(MMOa->getValue(), MMOa->getSize(), MMOa->getAAInfo());
2219  MemoryLocation LocB(MMOb->getValue(), MMOb->getSize(), MMOb->getAAInfo());
2220  if (!AA->alias(LocA, LocB))
2221  return true;
2222  }
2223  }
2224 
2225  // TODO: Should we check the address space from the MachineMemOperand? That
2226  // would allow us to distinguish objects we know don't alias based on the
2227  // underlying address space, even if it was lowered to a different one,
2228  // e.g. private accesses lowered to use MUBUF instructions on a scratch
2229  // buffer.
2230  if (isDS(MIa)) {
2231  if (isDS(MIb))
2232  return checkInstOffsetsDoNotOverlap(MIa, MIb);
2233 
2234  return !isFLAT(MIb) || isSegmentSpecificFLAT(MIb);
2235  }
2236 
2237  if (isMUBUF(MIa) || isMTBUF(MIa)) {
2238  if (isMUBUF(MIb) || isMTBUF(MIb))
2239  return checkInstOffsetsDoNotOverlap(MIa, MIb);
2240 
2241  return !isFLAT(MIb) && !isSMRD(MIb);
2242  }
2243 
2244  if (isSMRD(MIa)) {
2245  if (isSMRD(MIb))
2246  return checkInstOffsetsDoNotOverlap(MIa, MIb);
2247 
2248  return !isFLAT(MIb) && !isMUBUF(MIa) && !isMTBUF(MIa);
2249  }
2250 
2251  if (isFLAT(MIa)) {
2252  if (isFLAT(MIb))
2253  return checkInstOffsetsDoNotOverlap(MIa, MIb);
2254 
2255  return false;
2256  }
2257 
2258  return false;
2259 }
2260 
2261 static int64_t getFoldableImm(const MachineOperand* MO) {
2262  if (!MO->isReg())
2263  return false;
2264  const MachineFunction *MF = MO->getParent()->getParent()->getParent();
2265  const MachineRegisterInfo &MRI = MF->getRegInfo();
2266  auto Def = MRI.getUniqueVRegDef(MO->getReg());
2267  if (Def && Def->getOpcode() == AMDGPU::V_MOV_B32_e32 &&
2268  Def->getOperand(1).isImm())
2269  return Def->getOperand(1).getImm();
2270  return AMDGPU::NoRegister;
2271 }
2272 
2274  MachineInstr &MI,
2275  LiveVariables *LV) const {
2276  unsigned Opc = MI.getOpcode();
2277  bool IsF16 = false;
2278  bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64;
2279 
2280  switch (Opc) {
2281  default:
2282  return nullptr;
2283  case AMDGPU::V_MAC_F16_e64:
2284  IsF16 = true;
2286  case AMDGPU::V_MAC_F32_e64:
2287  case AMDGPU::V_FMAC_F32_e64:
2288  break;
2289  case AMDGPU::V_MAC_F16_e32:
2290  IsF16 = true;
2292  case AMDGPU::V_MAC_F32_e32:
2293  case AMDGPU::V_FMAC_F32_e32: {
2294  int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
2295  AMDGPU::OpName::src0);
2296  const MachineOperand *Src0 = &MI.getOperand(Src0Idx);
2297  if (!Src0->isReg() && !Src0->isImm())
2298  return nullptr;
2299 
2300  if (Src0->isImm() && !isInlineConstant(MI, Src0Idx, *Src0))
2301  return nullptr;
2302 
2303  break;
2304  }
2305  }
2306 
2307  const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
2308  const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);
2309  const MachineOperand *Src0Mods =
2310  getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
2311  const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
2312  const MachineOperand *Src1Mods =
2313  getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
2314  const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
2315  const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
2316  const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
2317 
2318  if (!IsFMA && !Src0Mods && !Src1Mods && !Clamp && !Omod &&
2319  // If we have an SGPR input, we will violate the constant bus restriction.
2320  (!Src0->isReg() || !RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg()))) {
2321  if (auto Imm = getFoldableImm(Src2)) {
2322  return BuildMI(*MBB, MI, MI.getDebugLoc(),
2323  get(IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32))
2324  .add(*Dst)
2325  .add(*Src0)
2326  .add(*Src1)
2327  .addImm(Imm);
2328  }
2329  if (auto Imm = getFoldableImm(Src1)) {
2330  return BuildMI(*MBB, MI, MI.getDebugLoc(),
2331  get(IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32))
2332  .add(*Dst)
2333  .add(*Src0)
2334  .addImm(Imm)
2335  .add(*Src2);
2336  }
2337  if (auto Imm = getFoldableImm(Src0)) {
2338  if (isOperandLegal(MI, AMDGPU::getNamedOperandIdx(AMDGPU::V_MADMK_F32,
2339  AMDGPU::OpName::src0), Src1))
2340  return BuildMI(*MBB, MI, MI.getDebugLoc(),
2341  get(IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32))
2342  .add(*Dst)
2343  .add(*Src1)
2344  .addImm(Imm)
2345  .add(*Src2);
2346  }
2347  }
2348 
2349  assert((!IsFMA || !IsF16) && "fmac only expected with f32");
2350  unsigned NewOpc = IsFMA ? AMDGPU::V_FMA_F32 :
2351  (IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32);
2352  return BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
2353  .add(*Dst)
2354  .addImm(Src0Mods ? Src0Mods->getImm() : 0)
2355  .add(*Src0)
2356  .addImm(Src1Mods ? Src1Mods->getImm() : 0)
2357  .add(*Src1)
2358  .addImm(0) // Src mods
2359  .add(*Src2)
2360  .addImm(Clamp ? Clamp->getImm() : 0)
2361  .addImm(Omod ? Omod->getImm() : 0);
2362 }
2363 
2364 // It's not generally safe to move VALU instructions across these since it will
2365 // start using the register as a base index rather than directly.
2366 // XXX - Why isn't hasSideEffects sufficient for these?
2368  switch (MI.getOpcode()) {
2369  case AMDGPU::S_SET_GPR_IDX_ON:
2370  case AMDGPU::S_SET_GPR_IDX_MODE:
2371  case AMDGPU::S_SET_GPR_IDX_OFF:
2372  return true;
2373  default:
2374  return false;
2375  }
2376 }
2377 
2379  const MachineBasicBlock *MBB,
2380  const MachineFunction &MF) const {
2381  // XXX - Do we want the SP check in the base implementation?
2382 
2383  // Target-independent instructions do not have an implicit-use of EXEC, even
2384  // when they operate on VGPRs. Treating EXEC modifications as scheduling
2385  // boundaries prevents incorrect movements of such instructions.
2386  return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF) ||
2387  MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
2388  MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
2389  MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
2391 }
2392 
2393 bool SIInstrInfo::isAlwaysGDS(uint16_t Opcode) const {
2394  return Opcode == AMDGPU::DS_ORDERED_COUNT ||
2395  Opcode == AMDGPU::DS_GWS_INIT ||
2396  Opcode == AMDGPU::DS_GWS_SEMA_V ||
2397  Opcode == AMDGPU::DS_GWS_SEMA_BR ||
2398  Opcode == AMDGPU::DS_GWS_SEMA_P ||
2399  Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL ||
2400  Opcode == AMDGPU::DS_GWS_BARRIER;
2401 }
2402 
2404  unsigned Opcode = MI.getOpcode();
2405 
2406  if (MI.mayStore() && isSMRD(MI))
2407  return true; // scalar store or atomic
2408 
2409  // These instructions cause shader I/O that may cause hardware lockups
2410  // when executed with an empty EXEC mask.
2411  //
2412  // Note: exp with VM = DONE = 0 is automatically skipped by hardware when
2413  // EXEC = 0, but checking for that case here seems not worth it
2414  // given the typical code patterns.
2415  if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
2416  Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE ||
2417  Opcode == AMDGPU::DS_ORDERED_COUNT)
2418  return true;
2419 
2420  if (MI.isInlineAsm())
2421  return true; // conservative assumption
2422 
2423  // These are like SALU instructions in terms of effects, so it's questionable
2424  // whether we should return true for those.
2425  //
2426  // However, executing them with EXEC = 0 causes them to operate on undefined
2427  // data, which we avoid by returning true here.
2428  if (Opcode == AMDGPU::V_READFIRSTLANE_B32 || Opcode == AMDGPU::V_READLANE_B32)
2429  return true;
2430 
2431  return false;
2432 }
2433 
2434 bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
2435  switch (Imm.getBitWidth()) {
2436  case 32:
2438  ST.hasInv2PiInlineImm());
2439  case 64:
2441  ST.hasInv2PiInlineImm());
2442  case 16:
2443  return ST.has16BitInsts() &&
2445  ST.hasInv2PiInlineImm());
2446  default:
2447  llvm_unreachable("invalid bitwidth");
2448  }
2449 }
2450 
2452  uint8_t OperandType) const {
2453  if (!MO.isImm() ||
2454  OperandType < AMDGPU::OPERAND_SRC_FIRST ||
2455  OperandType > AMDGPU::OPERAND_SRC_LAST)
2456  return false;
2457 
2458  // MachineOperand provides no way to tell the true operand size, since it only
2459  // records a 64-bit value. We need to know the size to determine if a 32-bit
2460  // floating point immediate bit pattern is legal for an integer immediate. It
2461  // would be for any 32-bit integer operand, but would not be for a 64-bit one.
2462 
2463  int64_t Imm = MO.getImm();
2464  switch (OperandType) {
2469  int32_t Trunc = static_cast<int32_t>(Imm);
2471  }
2477  ST.hasInv2PiInlineImm());
2482  if (isInt<16>(Imm) || isUInt<16>(Imm)) {
2483  // A few special case instructions have 16-bit operands on subtargets
2484  // where 16-bit instructions are not legal.
2485  // TODO: Do the 32-bit immediates work? We shouldn't really need to handle
2486  // constants in these cases
2487  int16_t Trunc = static_cast<int16_t>(Imm);
2488  return ST.has16BitInsts() &&
2490  }
2491 
2492  return false;
2493  }
2496  if (isUInt<16>(Imm)) {
2497  int16_t Trunc = static_cast<int16_t>(Imm);
2498  return ST.has16BitInsts() &&
2500  }
2501  if (!(Imm & 0xffff)) {
2502  return ST.has16BitInsts() &&
2504  }
2505  uint32_t Trunc = static_cast<uint32_t>(Imm);
2507  }
2508  default:
2509  llvm_unreachable("invalid bitwidth");
2510  }
2511 }
2512 
2514  const MCOperandInfo &OpInfo) const {
2515  switch (MO.getType()) {
2517  return false;
2519  return !isInlineConstant(MO, OpInfo);
2525  return true;
2526  default:
2527  llvm_unreachable("unexpected operand type");
2528  }
2529 }
2530 
2531 static bool compareMachineOp(const MachineOperand &Op0,
2532  const MachineOperand &Op1) {
2533  if (Op0.getType() != Op1.getType())
2534  return false;
2535 
2536  switch (Op0.getType()) {
2538  return Op0.getReg() == Op1.getReg();
2540  return Op0.getImm() == Op1.getImm();
2541  default:
2542  llvm_unreachable("Didn't expect to be comparing these operand types");
2543  }
2544 }
2545 
2547  const MachineOperand &MO) const {
2548  const MCOperandInfo &OpInfo = get(MI.getOpcode()).OpInfo[OpNo];
2549 
2550  assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
2551 
2552  if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE)
2553  return true;
2554 
2555  if (OpInfo.RegClass < 0)
2556  return false;
2557 
2558  if (MO.isImm() && isInlineConstant(MO, OpInfo))
2559  return RI.opCanUseInlineConstant(OpInfo.OperandType);
2560 
2561  return RI.opCanUseLiteralConstant(OpInfo.OperandType);
2562 }
2563 
2564 bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
2565  int Op32 = AMDGPU::getVOPe32(Opcode);
2566  if (Op32 == -1)
2567  return false;
2568 
2569  return pseudoToMCOpcode(Op32) != -1;
2570 }
2571 
2572 bool SIInstrInfo::hasModifiers(unsigned Opcode) const {
2573  // The src0_modifier operand is present on all instructions
2574  // that have modifiers.
2575 
2576  return AMDGPU::getNamedOperandIdx(Opcode,
2577  AMDGPU::OpName::src0_modifiers) != -1;
2578 }
2579 
2581  unsigned OpName) const {
2582  const MachineOperand *Mods = getNamedOperand(MI, OpName);
2583  return Mods && Mods->getImm();
2584 }
2585 
2587  return hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
2588  hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) ||
2589  hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers) ||
2590  hasModifiersSet(MI, AMDGPU::OpName::clamp) ||
2591  hasModifiersSet(MI, AMDGPU::OpName::omod);
2592 }
2593 
2595  const MachineRegisterInfo &MRI) const {
2596  const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
2597  // Can't shrink instruction with three operands.
2598  // FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add
2599  // a special case for it. It can only be shrunk if the third operand
2600  // is vcc. We should handle this the same way we handle vopc, by addding
2601  // a register allocation hint pre-regalloc and then do the shrinking
2602  // post-regalloc.
2603  if (Src2) {
2604  switch (MI.getOpcode()) {
2605  default: return false;
2606 
2607  case AMDGPU::V_ADDC_U32_e64:
2608  case AMDGPU::V_SUBB_U32_e64:
2609  case AMDGPU::V_SUBBREV_U32_e64: {
2610  const MachineOperand *Src1
2611  = getNamedOperand(MI, AMDGPU::OpName::src1);
2612  if (!Src1->isReg() || !RI.isVGPR(MRI, Src1->getReg()))
2613  return false;
2614  // Additional verification is needed for sdst/src2.
2615  return true;
2616  }
2617  case AMDGPU::V_MAC_F32_e64:
2618  case AMDGPU::V_MAC_F16_e64:
2619  case AMDGPU::V_FMAC_F32_e64:
2620  if (!Src2->isReg() || !RI.isVGPR(MRI, Src2->getReg()) ||
2621  hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers))
2622  return false;
2623  break;
2624 
2625  case AMDGPU::V_CNDMASK_B32_e64:
2626  break;
2627  }
2628  }
2629 
2630  const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
2631  if (Src1 && (!Src1->isReg() || !RI.isVGPR(MRI, Src1->getReg()) ||
2632  hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers)))
2633  return false;
2634 
2635  // We don't need to check src0, all input types are legal, so just make sure
2636  // src0 isn't using any modifiers.
2637  if (hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers))
2638  return false;
2639 
2640  // Can it be shrunk to a valid 32 bit opcode?
2641  if (!hasVALU32BitEncoding(MI.getOpcode()))
2642  return false;
2643 
2644  // Check output modifiers
2645  return !hasModifiersSet(MI, AMDGPU::OpName::omod) &&
2646  !hasModifiersSet(MI, AMDGPU::OpName::clamp);
2647 }
2648 
2649 // Set VCC operand with all flags from \p Orig, except for setting it as
2650 // implicit.
2652  const MachineOperand &Orig) {
2653 
2654  for (MachineOperand &Use : MI.implicit_operands()) {
2655  if (Use.isUse() && Use.getReg() == AMDGPU::VCC) {
2656  Use.setIsUndef(Orig.isUndef());
2657  Use.setIsKill(Orig.isKill());
2658  return;
2659  }
2660  }
2661 }
2662 
2664  unsigned Op32) const {
2665  MachineBasicBlock *MBB = MI.getParent();;
2666  MachineInstrBuilder Inst32 =
2667  BuildMI(*MBB, MI, MI.getDebugLoc(), get(Op32));
2668 
2669  // Add the dst operand if the 32-bit encoding also has an explicit $vdst.
2670  // For VOPC instructions, this is replaced by an implicit def of vcc.
2671  int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst);
2672  if (Op32DstIdx != -1) {
2673  // dst
2674  Inst32.add(MI.getOperand(0));
2675  } else {
2676  assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
2677  "Unexpected case");
2678  }
2679 
2680  Inst32.add(*getNamedOperand(MI, AMDGPU::OpName::src0));
2681 
2682  const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
2683  if (Src1)
2684  Inst32.add(*Src1);
2685 
2686  const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
2687 
2688  if (Src2) {
2689  int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
2690  if (Op32Src2Idx != -1) {
2691  Inst32.add(*Src2);
2692  } else {
2693  // In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
2694  // replaced with an implicit read of vcc. This was already added
2695  // during the initial BuildMI, so find it to preserve the flags.
2696  copyFlagsToImplicitVCC(*Inst32, *Src2);
2697  }
2698  }
2699 
2700  return Inst32;
2701 }
2702 
2704  const MachineOperand &MO,
2705  const MCOperandInfo &OpInfo) const {
2706  // Literal constants use the constant bus.
2707  //if (isLiteralConstantLike(MO, OpInfo))
2708  // return true;
2709  if (MO.isImm())
2710  return !isInlineConstant(MO, OpInfo);
2711 
2712  if (!MO.isReg())
2713  return true; // Misc other operands like FrameIndex
2714 
2715  if (!MO.isUse())
2716  return false;
2717 
2719  return RI.isSGPRClass(MRI.getRegClass(MO.getReg()));
2720 
2721  // FLAT_SCR is just an SGPR pair.
2722  if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR))
2723  return true;
2724 
2725  // EXEC register uses the constant bus.
2726  if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
2727  return true;
2728 
2729  // SGPRs use the constant bus
2730  return (MO.getReg() == AMDGPU::VCC || MO.getReg() == AMDGPU::M0 ||
2731  (!MO.isImplicit() &&
2732  (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) ||
2733  AMDGPU::SGPR_64RegClass.contains(MO.getReg()))));
2734 }
2735 
2736 static unsigned findImplicitSGPRRead(const MachineInstr &MI) {
2737  for (const MachineOperand &MO : MI.implicit_operands()) {
2738  // We only care about reads.
2739  if (MO.isDef())
2740  continue;
2741 
2742  switch (MO.getReg()) {
2743  case AMDGPU::VCC:
2744  case AMDGPU::M0:
2745  case AMDGPU::FLAT_SCR:
2746  return MO.getReg();
2747 
2748  default:
2749  break;
2750  }
2751  }
2752 
2753  return AMDGPU::NoRegister;
2754 }
2755 
2756 static bool shouldReadExec(const MachineInstr &MI) {
2757  if (SIInstrInfo::isVALU(MI)) {
2758  switch (MI.getOpcode()) {
2759  case AMDGPU::V_READLANE_B32:
2760  case AMDGPU::V_READLANE_B32_si:
2761  case AMDGPU::V_READLANE_B32_vi:
2762  case AMDGPU::V_WRITELANE_B32:
2763  case AMDGPU::V_WRITELANE_B32_si:
2764  case AMDGPU::V_WRITELANE_B32_vi:
2765  return false;
2766  }
2767 
2768  return true;
2769  }
2770 
2771  if (SIInstrInfo::isGenericOpcode(MI.getOpcode()) ||
2772  SIInstrInfo::isSALU(MI) ||
2773  SIInstrInfo::isSMRD(MI))
2774  return false;
2775 
2776  return true;
2777 }
2778 
2779 static bool isSubRegOf(const SIRegisterInfo &TRI,
2780  const MachineOperand &SuperVec,
2781  const MachineOperand &SubReg) {
2783  return TRI.isSubRegister(SuperVec.getReg(), SubReg.getReg());
2784 
2785  return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
2786  SubReg.getReg() == SuperVec.getReg();
2787 }
2788 
2790  StringRef &ErrInfo) const {
2791  uint16_t Opcode = MI.getOpcode();
2792  if (SIInstrInfo::isGenericOpcode(MI.getOpcode()))
2793  return true;
2794 
2795  const MachineFunction *MF = MI.getParent()->getParent();
2796  const MachineRegisterInfo &MRI = MF->getRegInfo();
2797 
2798  int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2799  int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2800  int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2801 
2802  // Make sure the number of operands is correct.
2803  const MCInstrDesc &Desc = get(Opcode);
2804  if (!Desc.isVariadic() &&
2805  Desc.getNumOperands() != MI.getNumExplicitOperands()) {
2806  ErrInfo = "Instruction has wrong number of operands.";
2807  return false;
2808  }
2809 
2810  if (MI.isInlineAsm()) {
2811  // Verify register classes for inlineasm constraints.
2812  for (unsigned I = InlineAsm::MIOp_FirstOperand, E = MI.getNumOperands();
2813  I != E; ++I) {
2814  const TargetRegisterClass *RC = MI.getRegClassConstraint(I, this, &RI);
2815  if (!RC)
2816  continue;
2817 
2818  const MachineOperand &Op = MI.getOperand(I);
2819  if (!Op.isReg())
2820  continue;
2821 
2822  unsigned Reg = Op.getReg();
2823  if (!TargetRegisterInfo::isVirtualRegister(Reg) && !RC->contains(Reg)) {
2824  ErrInfo = "inlineasm operand has incorrect register class.";
2825  return false;
2826  }
2827  }
2828 
2829  return true;
2830  }
2831 
2832  // Make sure the register classes are correct.
2833  for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) {
2834  if (MI.getOperand(i).isFPImm()) {
2835  ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast "
2836  "all fp values to integers.";
2837  return false;
2838  }
2839 
2840  int RegClass = Desc.OpInfo[i].RegClass;
2841 
2842  switch (Desc.OpInfo[i].OperandType) {
2844  if (MI.getOperand(i).isImm()) {
2845  ErrInfo = "Illegal immediate value for operand.";
2846  return false;
2847  }
2848  break;
2851  break;
2858  const MachineOperand &MO = MI.getOperand(i);
2859  if (!MO.isReg() && (!MO.isImm() || !isInlineConstant(MI, i))) {
2860  ErrInfo = "Illegal immediate value for operand.";
2861  return false;
2862  }
2863  break;
2864  }
2867  // Check if this operand is an immediate.
2868  // FrameIndex operands will be replaced by immediates, so they are
2869  // allowed.
2870  if (!MI.getOperand(i).isImm() && !MI.getOperand(i).isFI()) {
2871  ErrInfo = "Expected immediate, but got non-immediate";
2872  return false;
2873  }
2875  default:
2876  continue;
2877  }
2878 
2879  if (!MI.getOperand(i).isReg())
2880  continue;
2881 
2882  if (RegClass != -1) {
2883  unsigned Reg = MI.getOperand(i).getReg();
2884  if (Reg == AMDGPU::NoRegister ||
2886  continue;
2887 
2888  const TargetRegisterClass *RC = RI.getRegClass(RegClass);
2889  if (!RC->contains(Reg)) {
2890  ErrInfo = "Operand has incorrect register class.";
2891  return false;
2892  }
2893  }
2894  }
2895 
2896  // Verify SDWA
2897  if (isSDWA(MI)) {
2898  if (!ST.hasSDWA()) {
2899  ErrInfo = "SDWA is not supported on this target";
2900  return false;
2901  }
2902 
2903  int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2904 
2905  const int OpIndicies[] = { DstIdx, Src0Idx, Src1Idx, Src2Idx };
2906 
2907  for (int OpIdx: OpIndicies) {
2908  if (OpIdx == -1)
2909  continue;
2910  const MachineOperand &MO = MI.getOperand(OpIdx);
2911 
2912  if (!ST.hasSDWAScalar()) {
2913  // Only VGPRS on VI
2914  if (!MO.isReg() || !RI.hasVGPRs(RI.getRegClassForReg(MRI, MO.getReg()))) {
2915  ErrInfo = "Only VGPRs allowed as operands in SDWA instructions on VI";
2916  return false;
2917  }
2918  } else {
2919  // No immediates on GFX9
2920  if (!MO.isReg()) {
2921  ErrInfo = "Only reg allowed as operands in SDWA instructions on GFX9";
2922  return false;
2923  }
2924  }
2925  }
2926 
2927  if (!ST.hasSDWAOmod()) {
2928  // No omod allowed on VI
2929  const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod);
2930  if (OMod != nullptr &&
2931  (!OMod->isImm() || OMod->getImm() != 0)) {
2932  ErrInfo = "OMod not allowed in SDWA instructions on VI";
2933  return false;
2934  }
2935  }
2936 
2937  uint16_t BasicOpcode = AMDGPU::getBasicFromSDWAOp(Opcode);
2938  if (isVOPC(BasicOpcode)) {
2939  if (!ST.hasSDWASdst() && DstIdx != -1) {
2940  // Only vcc allowed as dst on VI for VOPC
2941  const MachineOperand &Dst = MI.getOperand(DstIdx);
2942  if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
2943  ErrInfo = "Only VCC allowed as dst in SDWA instructions on VI";
2944  return false;
2945  }
2946  } else if (!ST.hasSDWAOutModsVOPC()) {
2947  // No clamp allowed on GFX9 for VOPC
2948  const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
2949  if (Clamp && (!Clamp->isImm() || Clamp->getImm() != 0)) {
2950  ErrInfo = "Clamp not allowed in VOPC SDWA instructions on VI";
2951  return false;
2952  }
2953 
2954  // No omod allowed on GFX9 for VOPC
2955  const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod);
2956  if (OMod && (!OMod->isImm() || OMod->getImm() != 0)) {
2957  ErrInfo = "OMod not allowed in VOPC SDWA instructions on VI";
2958  return false;
2959  }
2960  }
2961  }
2962 
2963  const MachineOperand *DstUnused = getNamedOperand(MI, AMDGPU::OpName::dst_unused);
2964  if (DstUnused && DstUnused->isImm() &&
2965  DstUnused->getImm() == AMDGPU::SDWA::UNUSED_PRESERVE) {
2966  const MachineOperand &Dst = MI.getOperand(DstIdx);
2967  if (!Dst.isReg() || !Dst.isTied()) {
2968  ErrInfo = "Dst register should have tied register";
2969  return false;
2970  }
2971 
2972  const MachineOperand &TiedMO =
2973  MI.getOperand(MI.findTiedOperandIdx(DstIdx));
2974  if (!TiedMO.isReg() || !TiedMO.isImplicit() || !TiedMO.isUse()) {
2975  ErrInfo =
2976  "Dst register should be tied to implicit use of preserved register";
2977  return false;
2978  } else if (TargetRegisterInfo::isPhysicalRegister(TiedMO.getReg()) &&
2979  Dst.getReg() != TiedMO.getReg()) {
2980  ErrInfo = "Dst register should use same physical register as preserved";
2981  return false;
2982  }
2983  }
2984  }
2985 
2986  // Verify MIMG
2987  if (isMIMG(MI.getOpcode()) && !MI.mayStore()) {
2988  // Ensure that the return type used is large enough for all the options
2989  // being used TFE/LWE require an extra result register.
2990  const MachineOperand *DMask = getNamedOperand(MI, AMDGPU::OpName::dmask);
2991  if (DMask) {
2992  uint64_t DMaskImm = DMask->getImm();
2993  uint32_t RegCount =
2994  isGather4(MI.getOpcode()) ? 4 : countPopulation(DMaskImm);
2995  const MachineOperand *TFE = getNamedOperand(MI, AMDGPU::OpName::tfe);
2996  const MachineOperand *LWE = getNamedOperand(MI, AMDGPU::OpName::lwe);
2997  const MachineOperand *D16 = getNamedOperand(MI, AMDGPU::OpName::d16);
2998 
2999  // Adjust for packed 16 bit values
3000  if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
3001  RegCount >>= 1;
3002 
3003  // Adjust if using LWE or TFE
3004  if ((LWE && LWE->getImm()) || (TFE && TFE->getImm()))
3005  RegCount += 1;
3006 
3007  const uint32_t DstIdx =
3008  AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
3009  const MachineOperand &Dst = MI.getOperand(DstIdx);
3010  if (Dst.isReg()) {
3011  const TargetRegisterClass *DstRC = getOpRegClass(MI, DstIdx);
3012  uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
3013  if (RegCount > DstSize) {
3014  ErrInfo = "MIMG instruction returns too many registers for dst "
3015  "register class";
3016  return false;
3017  }
3018  }
3019  }
3020  }
3021 
3022  // Verify VOP*. Ignore multiple sgpr operands on writelane.
3023  if (Desc.getOpcode() != AMDGPU::V_WRITELANE_B32
3024  && (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isVOPC(MI) || isSDWA(MI))) {
3025  // Only look at the true operands. Only a real operand can use the constant
3026  // bus, and we don't want to check pseudo-operands like the source modifier
3027  // flags.
3028  const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3029 
3030  unsigned ConstantBusCount = 0;
3031  unsigned LiteralCount = 0;
3032 
3033  if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1)
3034  ++ConstantBusCount;
3035 
3036  unsigned SGPRUsed = findImplicitSGPRRead(MI);
3037  if (SGPRUsed != AMDGPU::NoRegister)
3038  ++ConstantBusCount;
3039 
3040  for (int OpIdx : OpIndices) {
3041  if (OpIdx == -1)
3042  break;
3043  const MachineOperand &MO = MI.getOperand(OpIdx);
3044  if (usesConstantBus(MRI, MO, MI.getDesc().OpInfo[OpIdx])) {
3045  if (MO.isReg()) {
3046  if (MO.getReg() != SGPRUsed)
3047  ++ConstantBusCount;
3048  SGPRUsed = MO.getReg();
3049  } else {
3050  ++ConstantBusCount;
3051  ++LiteralCount;
3052  }
3053  }
3054  }
3055  if (ConstantBusCount > 1) {
3056  ErrInfo = "VOP* instruction uses the constant bus more than once";
3057  return false;
3058  }
3059 
3060  if (isVOP3(MI) && LiteralCount) {
3061  ErrInfo = "VOP3 instruction uses literal";
3062  return false;
3063  }
3064  }
3065 
3066  // Verify misc. restrictions on specific instructions.
3067  if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 ||
3068  Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
3069  const MachineOperand &Src0 = MI.getOperand(Src0Idx);
3070  const MachineOperand &Src1 = MI.getOperand(Src1Idx);
3071  const MachineOperand &Src2 = MI.getOperand(Src2Idx);
3072  if (Src0.isReg() && Src1.isReg() && Src2.isReg()) {
3073  if (!compareMachineOp(Src0, Src1) &&
3074  !compareMachineOp(Src0, Src2)) {
3075  ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2";
3076  return false;
3077  }
3078  }
3079  }
3080 
3081  if (isSOPK(MI)) {
3082  int64_t Imm = getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();
3083  if (sopkIsZext(MI)) {
3084  if (!isUInt<16>(Imm)) {
3085  ErrInfo = "invalid immediate for SOPK instruction";
3086  return false;
3087  }
3088  } else {
3089  if (!isInt<16>(Imm)) {
3090  ErrInfo = "invalid immediate for SOPK instruction";
3091  return false;
3092  }
3093  }
3094  }
3095 
3096  if (Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
3097  Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
3098  Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
3099  Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
3100  const bool IsDst = Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
3101  Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
3102 
3103  const unsigned StaticNumOps = Desc.getNumOperands() +
3104  Desc.getNumImplicitUses();
3105  const unsigned NumImplicitOps = IsDst ? 2 : 1;
3106 
3107  // Allow additional implicit operands. This allows a fixup done by the post
3108  // RA scheduler where the main implicit operand is killed and implicit-defs
3109  // are added for sub-registers that remain live after this instruction.
3110  if (MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
3111  ErrInfo = "missing implicit register operands";
3112  return false;
3113  }
3114 
3115  const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
3116  if (IsDst) {
3117  if (!Dst->isUse()) {
3118  ErrInfo = "v_movreld_b32 vdst should be a use operand";
3119  return false;
3120  }
3121 
3122  unsigned UseOpIdx;
3123  if (!MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
3124  UseOpIdx != StaticNumOps + 1) {
3125  ErrInfo = "movrel implicit operands should be tied";
3126  return false;
3127  }
3128  }
3129 
3130  const MachineOperand &Src0 = MI.getOperand(Src0Idx);
3131  const MachineOperand &ImpUse
3132  = MI.getOperand(StaticNumOps + NumImplicitOps - 1);
3133  if (!ImpUse.isReg() || !ImpUse.isUse() ||
3134  !isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
3135  ErrInfo = "src0 should be subreg of implicit vector use";
3136  return false;
3137  }
3138  }
3139 
3140  // Make sure we aren't losing exec uses in the td files. This mostly requires
3141  // being careful when using let Uses to try to add other use registers.
3142  if (shouldReadExec(MI)) {
3143  if (!MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
3144  ErrInfo = "VALU instruction does not implicitly read exec mask";
3145  return false;
3146  }
3147  }
3148 
3149  if (isSMRD(MI)) {
3150  if (MI.mayStore()) {
3151  // The register offset form of scalar stores may only use m0 as the
3152  // soffset register.
3153  const MachineOperand *Soff = getNamedOperand(MI, AMDGPU::OpName::soff);
3154  if (Soff && Soff->getReg() != AMDGPU::M0) {
3155  ErrInfo = "scalar stores must use m0 as offset register";
3156  return false;
3157  }
3158  }
3159  }
3160 
3161  if (isFLAT(MI) && !MF->getSubtarget<GCNSubtarget>().hasFlatInstOffsets()) {
3162  const MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
3163  if (Offset->getImm() != 0) {
3164  ErrInfo = "subtarget does not support offsets in flat instructions";
3165  return false;
3166  }
3167  }
3168 
3169  const MachineOperand *DppCt = getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl);
3170  if (DppCt) {
3171  using namespace AMDGPU::DPP;
3172 
3173  unsigned DC = DppCt->getImm();
3174  if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
3175  DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
3180  ErrInfo = "Invalid dpp_ctrl value";
3181  return false;
3182  }
3183  }
3184 
3185  return true;
3186 }
3187 
3188 unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
3189  switch (MI.getOpcode()) {
3190  default: return AMDGPU::INSTRUCTION_LIST_END;
3191  case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
3192  case AMDGPU::COPY: return AMDGPU::COPY;
3193  case AMDGPU::PHI: return AMDGPU::PHI;
3194  case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG;
3195  case AMDGPU::WQM: return AMDGPU::WQM;
3196  case AMDGPU::WWM: return AMDGPU::WWM;
3197  case AMDGPU::S_MOV_B32:
3198  return MI.getOperand(1).isReg() ?
3199  AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
3200  case AMDGPU::S_ADD_I32:
3201  return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_I32_e32;
3202  case AMDGPU::S_ADDC_U32:
3203  return AMDGPU::V_ADDC_U32_e32;
3204  case AMDGPU::S_SUB_I32:
3205  return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
3206  // FIXME: These are not consistently handled, and selected when the carry is
3207  // used.
3208  case AMDGPU::S_ADD_U32:
3209  return AMDGPU::V_ADD_I32_e32;
3210  case AMDGPU::S_SUB_U32:
3211  return AMDGPU::V_SUB_I32_e32;
3212  case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
3213  case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32;
3214  case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e64;
3215  case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e64;
3216  case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e64;
3217  case AMDGPU::S_XNOR_B32:
3218  return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
3219  case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e64;
3220  case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e64;
3221  case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e64;
3222  case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e64;
3223  case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
3224  case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
3225  case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
3226  case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
3227  case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
3228  case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
3229  case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32;
3230  case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
3231  case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
3232  case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
3233  case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64;
3234  case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
3235  case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
3236  case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
3237  case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
3238  case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
3239  case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32;
3240  case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32;
3241  case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32;
3242  case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32;
3243  case AMDGPU::S_CMP_EQ_U32: return AMDGPU::V_CMP_EQ_U32_e32;
3244  case AMDGPU::S_CMP_LG_U32: return AMDGPU::V_CMP_NE_U32_e32;
3245  case AMDGPU::S_CMP_GT_U32: return AMDGPU::V_CMP_GT_U32_e32;
3246  case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e32;
3247  case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e32;
3248  case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e32;
3249  case AMDGPU::S_CMP_EQ_U64: return AMDGPU::V_CMP_EQ_U64_e32;
3250  case AMDGPU::S_CMP_LG_U64: return AMDGPU::V_CMP_NE_U64_e32;
3251  case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
3252  case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
3253  case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
3254  case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64;
3255  case AMDGPU::S_CBRANCH_SCC0: return AMDGPU::S_CBRANCH_VCCZ;
3256  case AMDGPU::S_CBRANCH_SCC1: return AMDGPU::S_CBRANCH_VCCNZ;
3257  }
3258 }
3259 
3261  unsigned OpNo) const {
3263  const MCInstrDesc &Desc = get(MI.getOpcode());
3264  if (MI.isVariadic() || OpNo >= Desc.getNumOperands() ||
3265  Desc.OpInfo[OpNo].RegClass == -1) {
3266  unsigned Reg = MI.getOperand(OpNo).getReg();
3267 
3269  return MRI.getRegClass(Reg);
3270  return RI.getPhysRegClass(Reg);
3271  }
3272 
3273  unsigned RCID = Desc.OpInfo[OpNo].RegClass;
3274  return RI.getRegClass(RCID);
3275 }
3276 
3277 bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
3278  switch (MI.getOpcode()) {
3279  case AMDGPU::COPY:
3280  case AMDGPU::REG_SEQUENCE:
3281  case AMDGPU::PHI:
3282  case AMDGPU::INSERT_SUBREG:
3283  return RI.hasVGPRs(getOpRegClass(MI, 0));
3284  default:
3285  return RI.hasVGPRs(getOpRegClass(MI, OpNo));
3286  }
3287 }
3288 
3289 void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const {
3291  MachineBasicBlock *MBB = MI.getParent();
3292  MachineOperand &MO = MI.getOperand(OpIdx);
3294  unsigned RCID = get(MI.getOpcode()).OpInfo[OpIdx].RegClass;
3295  const TargetRegisterClass *RC = RI.getRegClass(RCID);
3296  unsigned Opcode = AMDGPU::V_MOV_B32_e32;
3297  if (MO.isReg())
3298  Opcode = AMDGPU::COPY;
3299  else if (RI.isSGPRClass(RC))
3300  Opcode = AMDGPU::S_MOV_B32;
3301 
3302  const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
3303  if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC))
3304  VRC = &AMDGPU::VReg_64RegClass;
3305  else
3306  VRC = &AMDGPU::VGPR_32RegClass;
3307 
3308  unsigned Reg = MRI.createVirtualRegister(VRC);
3309  DebugLoc DL = MBB->findDebugLoc(I);
3310  BuildMI(*MI.getParent(), I, DL, get(Opcode), Reg).add(MO);
3311  MO.ChangeToRegister(Reg, false);
3312 }
3313 
3316  MachineOperand &SuperReg,
3317  const TargetRegisterClass *SuperRC,
3318  unsigned SubIdx,
3319  const TargetRegisterClass *SubRC)
3320  const {
3321  MachineBasicBlock *MBB = MI->getParent();
3322  DebugLoc DL = MI->getDebugLoc();
3323  unsigned SubReg = MRI.createVirtualRegister(SubRC);
3324 
3325  if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) {
3326  BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
3327  .addReg(SuperReg.getReg(), 0, SubIdx);
3328  return SubReg;
3329  }
3330 
3331  // Just in case the super register is itself a sub-register, copy it to a new
3332  // value so we don't need to worry about merging its subreg index with the
3333  // SubIdx passed to this function. The register coalescer should be able to
3334  // eliminate this extra copy.
3335  unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
3336 
3337  BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg)
3338  .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg());
3339 
3340  BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
3341  .addReg(NewSuperReg, 0, SubIdx);
3342 
3343  return SubReg;
3344 }
3345 
3349  MachineOperand &Op,
3350  const TargetRegisterClass *SuperRC,
3351  unsigned SubIdx,
3352  const TargetRegisterClass *SubRC) const {
3353  if (Op.isImm()) {
3354  if (SubIdx == AMDGPU::sub0)
3355  return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm()));
3356  if (SubIdx == AMDGPU::sub1)
3357  return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm() >> 32));
3358 
3359  llvm_unreachable("Unhandled register index for immediate");
3360  }
3361 
3362  unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC,
3363  SubIdx, SubRC);
3364  return MachineOperand::CreateReg(SubReg, false);
3365 }
3366 
3367 // Change the order of operands from (0, 1, 2) to (0, 2, 1)
3368 void SIInstrInfo::swapOperands(MachineInstr &Inst) const {
3369  assert(Inst.getNumExplicitOperands() == 3);
3370  MachineOperand Op1 = Inst.getOperand(1);
3371  Inst.RemoveOperand(1);
3372  Inst.addOperand(Op1);
3373 }
3374 
3376  const MCOperandInfo &OpInfo,
3377  const MachineOperand &MO) const {
3378  if (!MO.isReg())
3379  return false;
3380 
3381  unsigned Reg = MO.getReg();
3382  const TargetRegisterClass *RC =
3384  MRI.getRegClass(Reg) :
3385  RI.getPhysRegClass(Reg);
3386 
3387  const SIRegisterInfo *TRI =
3388  static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
3389  RC = TRI->getSubRegClass(RC, MO.getSubReg());
3390 
3391  // In order to be legal, the common sub-class must be equal to the
3392  // class of the current operand. For example:
3393  //
3394  // v_mov_b32 s0 ; Operand defined as vsrc_b32
3395  // ; RI.getCommonSubClass(s0,vsrc_b32) = sgpr ; LEGAL
3396  //
3397  // s_sendmsg 0, s0 ; Operand defined as m0reg
3398  // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
3399 
3400  return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
3401 }
3402 
3404  const MCOperandInfo &OpInfo,
3405  const MachineOperand &MO) const {
3406  if (MO.isReg())
3407  return isLegalRegOperand(MRI, OpInfo, MO);
3408 
3409  // Handle non-register types that are treated like immediates.
3410  assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
3411  return true;
3412 }
3413 
3414 bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
3415  const MachineOperand *MO) const {
3417  const MCInstrDesc &InstDesc = MI.getDesc();
3418  const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
3419  const TargetRegisterClass *DefinedRC =
3420  OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr;
3421  if (!MO)
3422  MO = &MI.getOperand(OpIdx);
3423 
3424  if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) {
3425 
3426  RegSubRegPair SGPRUsed;
3427  if (MO->isReg())
3428  SGPRUsed = RegSubRegPair(MO->getReg(), MO->getSubReg());
3429 
3430  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
3431  if (i == OpIdx)
3432  continue;
3433  const MachineOperand &Op = MI.getOperand(i);
3434  if (Op.isReg()) {
3435  if ((Op.getReg() != SGPRUsed.Reg || Op.getSubReg() != SGPRUsed.SubReg) &&
3436  usesConstantBus(MRI, Op, InstDesc.OpInfo[i])) {
3437  return false;
3438  }
3439  } else if (InstDesc.OpInfo[i].OperandType == AMDGPU::OPERAND_KIMM32) {
3440  return false;
3441  }
3442  }
3443  }
3444 
3445  if (MO->isReg()) {
3446  assert(DefinedRC);
3447  return isLegalRegOperand(MRI, OpInfo, *MO);
3448  }
3449 
3450  // Handle non-register types that are treated like immediates.
3451  assert(MO->isImm() || MO->isTargetIndex() || MO->isFI());
3452 
3453  if (!DefinedRC) {
3454  // This operand expects an immediate.
3455  return true;
3456  }
3457 
3458  return isImmOperandLegal(MI, OpIdx, *MO);
3459 }
3460 
3462  MachineInstr &MI) const {
3463  unsigned Opc = MI.getOpcode();
3464  const MCInstrDesc &InstrDesc = get(Opc);
3465 
3466  int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
3467  MachineOperand &Src1 = MI.getOperand(Src1Idx);
3468 
3469  // If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32
3470  // we need to only have one constant bus use.
3471  //
3472  // Note we do not need to worry about literal constants here. They are
3473  // disabled for the operand type for instructions because they will always
3474  // violate the one constant bus use rule.
3475  bool HasImplicitSGPR = findImplicitSGPRRead(MI) != AMDGPU::NoRegister;
3476  if (HasImplicitSGPR) {
3477  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3478  MachineOperand &Src0 = MI.getOperand(Src0Idx);
3479 
3480  if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg()))
3481  legalizeOpWithMove(MI, Src0Idx);
3482  }
3483 
3484  // Special case: V_WRITELANE_B32 accepts only immediate or SGPR operands for
3485  // both the value to write (src0) and lane select (src1). Fix up non-SGPR
3486  // src0/src1 with V_READFIRSTLANE.
3487  if (Opc == AMDGPU::V_WRITELANE_B32) {
3488  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3489  MachineOperand &Src0 = MI.getOperand(Src0Idx);
3490  const DebugLoc &DL = MI.getDebugLoc();
3491  if (Src0.isReg() && RI.isVGPR(MRI, Src0.getReg())) {
3492  unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
3493  BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
3494  .add(Src0);
3495  Src0.ChangeToRegister(Reg, false);
3496  }
3497  if (Src1.isReg() && RI.isVGPR(MRI, Src1.getReg())) {
3498  unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
3499  const DebugLoc &DL = MI.getDebugLoc();
3500  BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
3501  .add(Src1);
3502  Src1.ChangeToRegister(Reg, false);
3503  }
3504  return;
3505  }
3506 
3507  // VOP2 src0 instructions support all operand types, so we don't need to check
3508  // their legality. If src1 is already legal, we don't need to do anything.
3509  if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1))
3510  return;
3511 
3512  // Special case: V_READLANE_B32 accepts only immediate or SGPR operands for
3513  // lane select. Fix up using V_READFIRSTLANE, since we assume that the lane
3514  // select is uniform.
3515  if (Opc == AMDGPU::V_READLANE_B32 && Src1.isReg() &&
3516  RI.isVGPR(MRI, Src1.getReg())) {
3517  unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
3518  const DebugLoc &DL = MI.getDebugLoc();
3519  BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
3520  .add(Src1);
3521  Src1.ChangeToRegister(Reg, false);
3522  return;
3523  }
3524 
3525  // We do not use commuteInstruction here because it is too aggressive and will
3526  // commute if it is possible. We only want to commute here if it improves
3527  // legality. This can be called a fairly large number of times so don't waste
3528  // compile time pointlessly swapping and checking legality again.
3529  if (HasImplicitSGPR || !MI.isCommutable()) {
3530  legalizeOpWithMove(MI, Src1Idx);
3531  return;
3532  }
3533 
3534  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3535  MachineOperand &Src0 = MI.getOperand(Src0Idx);
3536 
3537  // If src0 can be used as src1, commuting will make the operands legal.
3538  // Otherwise we have to give up and insert a move.
3539  //
3540  // TODO: Other immediate-like operand kinds could be commuted if there was a
3541  // MachineOperand::ChangeTo* for them.
3542  if ((!Src1.isImm() && !Src1.isReg()) ||
3543  !isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) {
3544  legalizeOpWithMove(MI, Src1Idx);
3545  return;
3546  }
3547 
3548  int CommutedOpc = commuteOpcode(MI);
3549  if (CommutedOpc == -1) {
3550  legalizeOpWithMove(MI, Src1Idx);
3551  return;
3552  }
3553 
3554  MI.setDesc(get(CommutedOpc));
3555 
3556  unsigned Src0Reg = Src0.getReg();
3557  unsigned Src0SubReg = Src0.getSubReg();
3558  bool Src0Kill = Src0.isKill();
3559 
3560  if (Src1.isImm())
3561  Src0.ChangeToImmediate(Src1.getImm());
3562  else if (Src1.isReg()) {
3563  Src0.ChangeToRegister(Src1.getReg(), false, false, Src1.isKill());
3564  Src0.setSubReg(Src1.getSubReg());
3565  } else
3566  llvm_unreachable("Should only have register or immediate operands");
3567 
3568  Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill);
3569  Src1.setSubReg(Src0SubReg);
3570 }
3571 
3572 // Legalize VOP3 operands. Because all operand types are supported for any
3573 // operand, and since literal constants are not allowed and should never be
3574 // seen, we only need to worry about inserting copies if we use multiple SGPR
3575 // operands.
3577  MachineInstr &MI) const {
3578  unsigned Opc = MI.getOpcode();
3579 
3580  int VOP3Idx[3] = {
3581  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
3582  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1),
3583  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)
3584  };
3585 
3586  // Find the one SGPR operand we are allowed to use.
3587  unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
3588 
3589  for (unsigned i = 0; i < 3; ++i) {
3590  int Idx = VOP3Idx[i];
3591  if (Idx == -1)
3592  break;
3593  MachineOperand &MO = MI.getOperand(Idx);
3594 
3595  // We should never see a VOP3 instruction with an illegal immediate operand.
3596  if (!MO.isReg())
3597  continue;
3598 
3599  if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
3600  continue; // VGPRs are legal
3601 
3602  if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
3603  SGPRReg = MO.getReg();
3604  // We can use one SGPR in each VOP3 instruction.
3605  continue;
3606  }
3607 
3608  // If we make it this far, then the operand is not legal and we must
3609  // legalize it.
3610  legalizeOpWithMove(MI, Idx);
3611  }
3612 }
3613 
3615  MachineRegisterInfo &MRI) const {
3616  const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg);
3617  const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC);
3618  unsigned DstReg = MRI.createVirtualRegister(SRC);
3619  unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
3620 
3621  if (SubRegs == 1) {
3622  BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
3623  get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
3624  .addReg(SrcReg);
3625  return DstReg;
3626  }
3627 
3629  for (unsigned i = 0; i < SubRegs; ++i) {
3630  unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
3631  BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
3632  get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
3633  .addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
3634  SRegs.push_back(SGPR);
3635  }
3636 
3637  MachineInstrBuilder MIB =
3638  BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
3639  get(AMDGPU::REG_SEQUENCE), DstReg);
3640  for (unsigned i = 0; i < SubRegs; ++i) {
3641  MIB.addReg(SRegs[i]);
3642  MIB.addImm(RI.getSubRegFromChannel(i));
3643  }
3644  return DstReg;
3645 }
3646 
3648  MachineInstr &MI) const {
3649 
3650  // If the pointer is store in VGPRs, then we need to move them to
3651  // SGPRs using v_readfirstlane. This is safe because we only select
3652  // loads with uniform pointers to SMRD instruction so we know the
3653  // pointer value is uniform.
3654  MachineOperand *SBase = getNamedOperand(MI, AMDGPU::OpName::sbase);
3655  if (SBase && !RI.isSGPRClass(MRI.getRegClass(SBase->getReg()))) {
3656  unsigned SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI);
3657  SBase->setReg(SGPR);
3658  }
3659  MachineOperand *SOff = getNamedOperand(MI, AMDGPU::OpName::soff);
3660  if (SOff && !RI.isSGPRClass(MRI.getRegClass(SOff->getReg()))) {
3661  unsigned SGPR = readlaneVGPRToSGPR(SOff->getReg(), MI, MRI);
3662  SOff->setReg(SGPR);
3663  }
3664 }
3665 
3668  const TargetRegisterClass *DstRC,
3669  MachineOperand &Op,
3671  const DebugLoc &DL) const {
3672  unsigned OpReg = Op.getReg();
3673  unsigned OpSubReg = Op.getSubReg();
3674 
3675  const TargetRegisterClass *OpRC = RI.getSubClassWithSubReg(
3676  RI.getRegClassForReg(MRI, OpReg), OpSubReg);
3677 
3678  // Check if operand is already the correct register class.
3679  if (DstRC == OpRC)
3680  return;
3681 
3682  unsigned DstReg = MRI.createVirtualRegister(DstRC);
3683  MachineInstr *Copy =
3684  BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op);
3685 
3686  Op.setReg(DstReg);
3687  Op.setSubReg(0);
3688 
3689  MachineInstr *Def = MRI.getVRegDef(OpReg);
3690  if (!Def)
3691  return;
3692 
3693  // Try to eliminate the copy if it is copying an immediate value.
3694  if (Def->isMoveImmediate())
3695  FoldImmediate(*Copy, *Def, OpReg, &MRI);
3696 }
3697 
3698 // Emit the actual waterfall loop, executing the wrapped instruction for each
3699 // unique value of \p Rsrc across all lanes. In the best case we execute 1
3700 // iteration, in the worst case we execute 64 (once per lane).
3701 static void
3703  MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB,
3704  const DebugLoc &DL, MachineOperand &Rsrc) {
3705  MachineBasicBlock::iterator I = LoopBB.begin();
3706 
3707  unsigned VRsrc = Rsrc.getReg();
3708  unsigned VRsrcUndef = getUndefRegState(Rsrc.isUndef());
3709 
3710  unsigned SaveExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
3711  unsigned CondReg0 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
3712  unsigned CondReg1 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
3713  unsigned AndCond = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
3714  unsigned SRsrcSub0 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
3715  unsigned SRsrcSub1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
3716  unsigned SRsrcSub2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
3717  unsigned SRsrcSub3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
3718  unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
3719 
3720  // Beginning of the loop, read the next Rsrc variant.
3721  BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub0)
3722  .addReg(VRsrc, VRsrcUndef, AMDGPU::sub0);
3723  BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub1)
3724  .addReg(VRsrc, VRsrcUndef, AMDGPU::sub1);
3725  BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub2)
3726  .addReg(VRsrc, VRsrcUndef, AMDGPU::sub2);
3727  BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub3)
3728  .addReg(VRsrc, VRsrcUndef, AMDGPU::sub3);
3729 
3730  BuildMI(LoopBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), SRsrc)
3731  .addReg(SRsrcSub0)
3732  .addImm(AMDGPU::sub0)
3733  .addReg(SRsrcSub1)
3734  .addImm(AMDGPU::sub1)
3735  .addReg(SRsrcSub2)
3736  .addImm(AMDGPU::sub2)
3737  .addReg(SRsrcSub3)
3738  .addImm(AMDGPU::sub3);
3739 
3740  // Update Rsrc operand to use the SGPR Rsrc.
3741  Rsrc.setReg(SRsrc);
3742  Rsrc.setIsKill(true);
3743 
3744  // Identify all lanes with identical Rsrc operands in their VGPRs.
3745  BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U64_e64), CondReg0)
3746  .addReg(SRsrc, 0, AMDGPU::sub0_sub1)
3747  .addReg(VRsrc, 0, AMDGPU::sub0_sub1);
3748  BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U64_e64), CondReg1)
3749  .addReg(SRsrc, 0, AMDGPU::sub2_sub3)
3750  .addReg(VRsrc, 0, AMDGPU::sub2_sub3);
3751  BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_AND_B64), AndCond)
3752  .addReg(CondReg0)
3753  .addReg(CondReg1);
3754 
3755  MRI.setSimpleHint(SaveExec, AndCond);
3756 
3757  // Update EXEC to matching lanes, saving original to SaveExec.
3758  BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_AND_SAVEEXEC_B64), SaveExec)
3759  .addReg(AndCond, RegState::Kill);
3760 
3761  // The original instruction is here; we insert the terminators after it.
3762  I = LoopBB.end();
3763 
3764  // Update EXEC, switch all done bits to 0 and all todo bits to 1.
3765  BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_XOR_B64_term), AMDGPU::EXEC)
3766  .addReg(AMDGPU::EXEC)
3767  .addReg(SaveExec);
3768  BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_CBRANCH_EXECNZ)).addMBB(&LoopBB);
3769 }
3770 
3771 // Build a waterfall loop around \p MI, replacing the VGPR \p Rsrc register
3772 // with SGPRs by iterating over all unique values across all lanes.
3774  MachineOperand &Rsrc, MachineDominatorTree *MDT) {
3775  MachineBasicBlock &MBB = *MI.getParent();
3776  MachineFunction &MF = *MBB.getParent();
3779  const DebugLoc &DL = MI.getDebugLoc();
3780 
3781  unsigned SaveExec = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
3782 
3783  // Save the EXEC mask
3784  BuildMI(MBB, I, DL, TII.get(AMDGPU::S_MOV_B64), SaveExec)
3785  .addReg(AMDGPU::EXEC);
3786 
3787  // Killed uses in the instruction we are waterfalling around will be
3788  // incorrect due to the added control-flow.
3789  for (auto &MO : MI.uses()) {
3790  if (MO.isReg() && MO.isUse()) {
3791  MRI.clearKillFlags(MO.getReg());
3792  }
3793  }
3794 
3795  // To insert the loop we need to split the block. Move everything after this
3796  // point to a new block, and insert a new empty block between the two.
3798  MachineBasicBlock *RemainderBB = MF.CreateMachineBasicBlock();
3799  MachineFunction::iterator MBBI(MBB);
3800  ++MBBI;
3801 
3802  MF.insert(MBBI, LoopBB);
3803  MF.insert(MBBI, RemainderBB);
3804 
3805  LoopBB->addSuccessor(LoopBB);
3806  LoopBB->addSuccessor(RemainderBB);
3807 
3808  // Move MI to the LoopBB, and the remainder of the block to RemainderBB.
3810  RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);
3811  RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
3812  LoopBB->splice(LoopBB->begin(), &MBB, J);
3813 
3814  MBB.addSuccessor(LoopBB);
3815 
3816  // Update dominators. We know that MBB immediately dominates LoopBB, that
3817  // LoopBB immediately dominates RemainderBB, and that RemainderBB immediately
3818  // dominates all of the successors transferred to it from MBB that MBB used
3819  // to dominate.
3820  if (MDT) {
3821  MDT->addNewBlock(LoopBB, &MBB);
3822  MDT->addNewBlock(RemainderBB, LoopBB);
3823  for (auto &Succ : RemainderBB->successors()) {
3824  if (MDT->dominates(&MBB, Succ)) {
3825  MDT->changeImmediateDominator(Succ, RemainderBB);
3826  }
3827  }
3828  }
3829 
3830  emitLoadSRsrcFromVGPRLoop(TII, MRI, MBB, *LoopBB, DL, Rsrc);
3831 
3832  // Restore the EXEC mask
3833  MachineBasicBlock::iterator First = RemainderBB->begin();
3834  BuildMI(*RemainderBB, First, DL, TII.get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
3835  .addReg(SaveExec);
3836 }
3837 
3838 // Extract pointer from Rsrc and return a zero-value Rsrc replacement.
3839 static std::tuple<unsigned, unsigned>
3841  MachineBasicBlock &MBB = *MI.getParent();
3842  MachineFunction &MF = *MBB.getParent();
3844 
3845  // Extract the ptr from the resource descriptor.
3846  unsigned RsrcPtr =
3847  TII.buildExtractSubReg(MI, MRI, Rsrc, &AMDGPU::VReg_128RegClass,
3848  AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
3849 
3850  // Create an empty resource descriptor
3851  unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
3852  unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
3853  unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
3854  unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
3855  uint64_t RsrcDataFormat = TII.getDefaultRsrcDataFormat();
3856 
3857  // Zero64 = 0
3858  BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B64), Zero64)
3859  .addImm(0);
3860 
3861  // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
3862  BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), SRsrcFormatLo)
3863  .addImm(RsrcDataFormat & 0xFFFFFFFF);
3864 
3865  // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
3866  BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), SRsrcFormatHi)
3867  .addImm(RsrcDataFormat >> 32);
3868 
3869  // NewSRsrc = {Zero64, SRsrcFormat}
3870  BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::REG_SEQUENCE), NewSRsrc)
3871  .addReg(Zero64)
3872  .addImm(AMDGPU::sub0_sub1)
3873  .addReg(SRsrcFormatLo)
3874  .addImm(AMDGPU::sub2)
3875  .addReg(SRsrcFormatHi)
3876  .addImm(AMDGPU::sub3);
3877 
3878  return std::make_tuple(RsrcPtr, NewSRsrc);
3879 }
3880 
3882  MachineDominatorTree *MDT) const {
3883  MachineFunction &MF = *MI.getParent()->getParent();
3885 
3886  // Legalize VOP2
3887  if (isVOP2(MI) || isVOPC(MI)) {
3888  legalizeOperandsVOP2(MRI, MI);
3889  return;
3890  }
3891 
3892  // Legalize VOP3
3893  if (isVOP3(MI)) {
3894  legalizeOperandsVOP3(MRI, MI);
3895  return;
3896  }
3897 
3898  // Legalize SMRD
3899  if (isSMRD(MI)) {
3900  legalizeOperandsSMRD(MRI, MI);
3901  return;
3902  }
3903 
3904  // Legalize REG_SEQUENCE and PHI
3905  // The register class of the operands much be the same type as the register
3906  // class of the output.
3907  if (MI.getOpcode() == AMDGPU::PHI) {
3908  const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr;
3909  for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
3910  if (!MI.getOperand(i).isReg() ||
3912  continue;
3913  const TargetRegisterClass *OpRC =
3914  MRI.getRegClass(MI.getOperand(i).getReg());
3915  if (RI.hasVGPRs(OpRC)) {
3916  VRC = OpRC;
3917  } else {
3918  SRC = OpRC;
3919  }
3920  }
3921 
3922  // If any of the operands are VGPR registers, then they all most be
3923  // otherwise we will create illegal VGPR->SGPR copies when legalizing
3924  // them.
3925  if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) {
3926  if (!VRC) {
3927  assert(SRC);
3928  VRC = RI.getEquivalentVGPRClass(SRC);
3929  }
3930  RC = VRC;
3931  } else {
3932  RC = SRC;
3933  }
3934 
3935  // Update all the operands so they have the same type.
3936  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3937  MachineOperand &Op = MI.getOperand(I);
3939  continue;
3940 
3941  // MI is a PHI instruction.
3942  MachineBasicBlock *InsertBB = MI.getOperand(I + 1).getMBB();
3943  MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator();
3944 
3945  // Avoid creating no-op copies with the same src and dst reg class. These
3946  // confuse some of the machine passes.
3947  legalizeGenericOperand(*InsertBB, Insert, RC, Op, MRI, MI.getDebugLoc());
3948  }
3949  }
3950 
3951  // REG_SEQUENCE doesn't really require operand legalization, but if one has a
3952  // VGPR dest type and SGPR sources, insert copies so all operands are
3953  // VGPRs. This seems to help operand folding / the register coalescer.
3954  if (MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
3955  MachineBasicBlock *MBB = MI.getParent();
3956  const TargetRegisterClass *DstRC = getOpRegClass(MI, 0);
3957  if (RI.hasVGPRs(DstRC)) {
3958  // Update all the operands so they are VGPR register classes. These may
3959  // not be the same register class because REG_SEQUENCE supports mixing
3960  // subregister index types e.g. sub0_sub1 + sub2 + sub3
3961  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3962  MachineOperand &Op = MI.getOperand(I);
3964  continue;
3965 
3966  const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg());
3967  const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(OpRC);
3968  if (VRC == OpRC)
3969  continue;
3970 
3971  legalizeGenericOperand(*MBB, MI, VRC, Op, MRI, MI.getDebugLoc());
3972  Op.setIsKill();
3973  }
3974  }
3975 
3976  return;
3977  }
3978 
3979  // Legalize INSERT_SUBREG
3980  // src0 must have the same register class as dst
3981  if (MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
3982  unsigned Dst = MI.getOperand(0).getReg();
3983  unsigned Src0 = MI.getOperand(1).getReg();
3984  const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
3985  const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0);
3986  if (DstRC != Src0RC) {
3987  MachineBasicBlock *MBB = MI.getParent();
3988  MachineOperand &Op = MI.getOperand(1);
3989  legalizeGenericOperand(*MBB, MI, DstRC, Op, MRI, MI.getDebugLoc());
3990  }
3991  return;
3992  }
3993 
3994  // Legalize SI_INIT_M0
3995  if (MI.getOpcode() == AMDGPU::SI_INIT_M0) {
3996  MachineOperand &Src = MI.getOperand(0);
3997  if (Src.isReg() && RI.hasVGPRs(MRI.getRegClass(Src.getReg())))
3998  Src.setReg(readlaneVGPRToSGPR(Src.getReg(), MI, MRI));
3999  return;
4000  }
4001 
4002  // Legalize MIMG and MUBUF/MTBUF for shaders.
4003  //
4004  // Shaders only generate MUBUF/MTBUF instructions via intrinsics or via
4005  // scratch memory access. In both cases, the legalization never involves
4006  // conversion to the addr64 form.
4007  if (isMIMG(MI) ||
4009  (isMUBUF(MI) || isMTBUF(MI)))) {
4010  MachineOperand *SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc);
4011  if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) {
4012  unsigned SGPR = readlaneVGPRToSGPR(SRsrc->getReg(), MI, MRI);
4013  SRsrc->setReg(SGPR);
4014  }
4015 
4016  MachineOperand *SSamp = getNamedOperand(MI, AMDGPU::OpName::ssamp);
4017  if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg()))) {
4018  unsigned SGPR = readlaneVGPRToSGPR(SSamp->getReg(), MI, MRI);
4019  SSamp->setReg(SGPR);
4020  }
4021  return;
4022  }
4023 
4024  // Legalize MUBUF* instructions.
4025  int RsrcIdx =
4026  AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
4027  if (RsrcIdx != -1) {
4028  // We have an MUBUF instruction
4029  MachineOperand *Rsrc = &MI.getOperand(RsrcIdx);
4030  unsigned RsrcRC = get(MI.getOpcode()).OpInfo[RsrcIdx].RegClass;
4031  if (RI.getCommonSubClass(MRI.getRegClass(Rsrc->getReg()),
4032  RI.getRegClass(RsrcRC))) {
4033  // The operands are legal.
4034  // FIXME: We may need to legalize operands besided srsrc.
4035  return;
4036  }
4037 
4038  // Legalize a VGPR Rsrc.
4039  //
4040  // If the instruction is _ADDR64, we can avoid a waterfall by extracting
4041  // the base pointer from the VGPR Rsrc, adding it to the VAddr, then using
4042  // a zero-value SRsrc.
4043  //
4044  // If the instruction is _OFFSET (both idxen and offen disabled), and we
4045  // support ADDR64 instructions, we can convert to ADDR64 and do the same as
4046  // above.
4047  //
4048  // Otherwise we are on non-ADDR64 hardware, and/or we have
4049  // idxen/offen/bothen and we fall back to a waterfall loop.
4050 
4051  MachineBasicBlock &MBB = *MI.getParent();
4052 
4053  MachineOperand *VAddr = getNamedOperand(MI, AMDGPU::OpName::vaddr);
4054  if (VAddr && AMDGPU::getIfAddr64Inst(MI.getOpcode()) != -1) {
4055  // This is already an ADDR64 instruction so we need to add the pointer
4056  // extracted from the resource descriptor to the current value of VAddr.
4057  unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4058  unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4059  unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
4060 
4061  unsigned RsrcPtr, NewSRsrc;
4062  std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(*this, MI, *Rsrc);
4063 
4064  // NewVaddrLo = RsrcPtr:sub0 + VAddr:sub0
4065  DebugLoc DL = MI.getDebugLoc();
4066  BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo)
4067  .addReg(RsrcPtr, 0, AMDGPU::sub0)
4068  .addReg(VAddr->getReg(), 0, AMDGPU::sub0);
4069 
4070  // NewVaddrHi = RsrcPtr:sub1 + VAddr:sub1
4071  BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi)
4072  .addReg(RsrcPtr, 0, AMDGPU::sub1)
4073  .addReg(VAddr->getReg(), 0, AMDGPU::sub1);
4074 
4075  // NewVaddr = {NewVaddrHi, NewVaddrLo}
4076  BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
4077  .addReg(NewVAddrLo)
4078  .addImm(AMDGPU::sub0)
4079  .addReg(NewVAddrHi)
4080  .addImm(AMDGPU::sub1);
4081 
4082  VAddr->setReg(NewVAddr);
4083  Rsrc->setReg(NewSRsrc);
4084  } else if (!VAddr && ST.hasAddr64()) {
4085  // This instructions is the _OFFSET variant, so we need to convert it to
4086  // ADDR64.
4087  assert(MBB.getParent()->getSubtarget<GCNSubtarget>().getGeneration()
4089  "FIXME: Need to emit flat atomics here");
4090 
4091  unsigned RsrcPtr, NewSRsrc;
4092  std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(*this, MI, *Rsrc);
4093 
4094  unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
4095  MachineOperand *VData = getNamedOperand(MI, AMDGPU::OpName::vdata);
4096  MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
4097  MachineOperand *SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset);
4098  unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI.getOpcode());
4099 
4100  // Atomics rith return have have an additional tied operand and are
4101  // missing some of the special bits.
4102  MachineOperand *VDataIn = getNamedOperand(MI, AMDGPU::OpName::vdata_in);
4103  MachineInstr *Addr64;
4104 
4105  if (!VDataIn) {
4106  // Regular buffer load / store.
4107  MachineInstrBuilder MIB =
4108  BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode))
4109  .add(*VData)
4110  .addReg(NewVAddr)
4111  .addReg(NewSRsrc)
4112  .add(*SOffset)
4113  .add(*Offset);
4114 
4115  // Atomics do not have this operand.
4116  if (const MachineOperand *GLC =
4117  getNamedOperand(MI, AMDGPU::OpName::glc)) {
4118  MIB.addImm(GLC->getImm());
4119  }
4120 
4121  MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc));
4122 
4123  if (const MachineOperand *TFE =
4124  getNamedOperand(MI, AMDGPU::OpName::tfe)) {
4125  MIB.addImm(TFE->getImm());
4126  }
4127 
4128  MIB.cloneMemRefs(MI);
4129  Addr64 = MIB;
4130  } else {
4131  // Atomics with return.
4132  Addr64 = BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode))
4133  .add(*VData)
4134  .add(*VDataIn)
4135  .addReg(NewVAddr)
4136  .addReg(NewSRsrc)
4137  .add(*SOffset)
4138  .add(*Offset)
4139  .addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc))
4140  .cloneMemRefs(MI);
4141  }
4142 
4143  MI.removeFromParent();
4144 
4145  // NewVaddr = {NewVaddrHi, NewVaddrLo}
4146  BuildMI(MBB, Addr64, Addr64->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
4147  NewVAddr)
4148  .addReg(RsrcPtr, 0, AMDGPU::sub0)
4149  .addImm(AMDGPU::sub0)
4150  .addReg(RsrcPtr, 0, AMDGPU::sub1)
4151  .addImm(AMDGPU::sub1);
4152  } else {
4153  // This is another variant; legalize Rsrc with waterfall loop from VGPRs
4154  // to SGPRs.
4155  loadSRsrcFromVGPR(*this, MI, *Rsrc, MDT);
4156  }
4157  }
4158 }
4159 
4161  MachineDominatorTree *MDT) const {
4162  SetVectorType Worklist;
4163  Worklist.insert(&TopInst);
4164 
4165  while (!Worklist.empty()) {
4166  MachineInstr &Inst = *Worklist.pop_back_val();
4167  MachineBasicBlock *MBB = Inst.getParent();
4169 
4170  unsigned Opcode = Inst.getOpcode();
4171  unsigned NewOpcode = getVALUOp(Inst);
4172 
4173  // Handle some special cases
4174  switch (Opcode) {
4175  default:
4176  break;
4177  case AMDGPU::S_ADD_U64_PSEUDO:
4178  case AMDGPU::S_SUB_U64_PSEUDO:
4179  splitScalar64BitAddSub(Worklist, Inst, MDT);
4180  Inst.eraseFromParent();
4181  continue;
4182  case AMDGPU::S_ADD_I32:
4183  case AMDGPU::S_SUB_I32:
4184  // FIXME: The u32 versions currently selected use the carry.
4185  if (moveScalarAddSub(Worklist, Inst, MDT))
4186  continue;
4187 
4188  // Default handling
4189  break;
4190  case AMDGPU::S_AND_B64:
4191  splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
4192  Inst.eraseFromParent();
4193  continue;
4194 
4195  case AMDGPU::S_OR_B64:
4196  splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
4197  Inst.eraseFromParent();
4198  continue;
4199 
4200  case AMDGPU::S_XOR_B64:
4201  splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
4202  Inst.eraseFromParent();
4203  continue;
4204 
4205  case AMDGPU::S_NAND_B64:
4206  splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
4207  Inst.eraseFromParent();
4208  continue;
4209 
4210  case AMDGPU::S_NOR_B64:
4211  splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
4212  Inst.eraseFromParent();
4213  continue;
4214 
4215  case AMDGPU::S_XNOR_B64:
4216  if (ST.hasDLInsts())
4217  splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
4218  else
4219  splitScalar64BitXnor(Worklist, Inst, MDT);
4220  Inst.eraseFromParent();
4221  continue;
4222 
4223  case AMDGPU::S_ANDN2_B64:
4224  splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
4225  Inst.eraseFromParent();
4226  continue;
4227 
4228  case AMDGPU::S_ORN2_B64:
4229  splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
4230  Inst.eraseFromParent();
4231  continue;
4232 
4233  case AMDGPU::S_NOT_B64:
4234  splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
4235  Inst.eraseFromParent();
4236  continue;
4237 
4238  case AMDGPU::S_BCNT1_I32_B64:
4239  splitScalar64BitBCNT(Worklist, Inst);
4240  Inst.eraseFromParent();
4241  continue;
4242 
4243  case AMDGPU::S_BFE_I64:
4244  splitScalar64BitBFE(Worklist, Inst);
4245  Inst.eraseFromParent();
4246  continue;
4247 
4248  case AMDGPU::S_LSHL_B32:
4249  if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
4250  NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
4251  swapOperands(Inst);
4252  }
4253  break;
4254  case AMDGPU::S_ASHR_I32:
4255  if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
4256  NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
4257  swapOperands(Inst);
4258  }
4259  break;
4260  case AMDGPU::S_LSHR_B32:
4261  if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
4262  NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
4263  swapOperands(Inst);
4264  }
4265  break;
4266  case AMDGPU::S_LSHL_B64:
4267  if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
4268  NewOpcode = AMDGPU::V_LSHLREV_B64;
4269  swapOperands(Inst);
4270  }
4271  break;
4272  case AMDGPU::S_ASHR_I64:
4273  if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
4274  NewOpcode = AMDGPU::V_ASHRREV_I64;
4275  swapOperands(Inst);
4276  }
4277  break;
4278  case AMDGPU::S_LSHR_B64:
4279  if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
4280  NewOpcode = AMDGPU::V_LSHRREV_B64;
4281  swapOperands(Inst);
4282  }
4283  break;
4284 
4285  case AMDGPU::S_ABS_I32:
4286  lowerScalarAbs(Worklist, Inst);
4287  Inst.eraseFromParent();
4288  continue;
4289 
4290  case AMDGPU::S_CBRANCH_SCC0:
4291  case AMDGPU::S_CBRANCH_SCC1:
4292  // Clear unused bits of vcc
4293  BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::S_AND_B64),
4294  AMDGPU::VCC)
4295  .addReg(AMDGPU::EXEC)
4296  .addReg(AMDGPU::VCC);
4297  break;
4298 
4299  case AMDGPU::S_BFE_U64:
4300  case AMDGPU::S_BFM_B64:
4301  llvm_unreachable("Moving this op to VALU not implemented");
4302 
4303  case AMDGPU::S_PACK_LL_B32_B16:
4304  case AMDGPU::S_PACK_LH_B32_B16:
4305  case AMDGPU::S_PACK_HH_B32_B16:
4306  movePackToVALU(Worklist, MRI, Inst);
4307  Inst.eraseFromParent();
4308  continue;
4309 
4310  case AMDGPU::S_XNOR_B32:
4311  lowerScalarXnor(Worklist, Inst);
4312  Inst.eraseFromParent();
4313  continue;
4314 
4315  case AMDGPU::S_NAND_B32:
4316  splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
4317  Inst.eraseFromParent();
4318  continue;
4319 
4320  case AMDGPU::S_NOR_B32:
4321  splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
4322  Inst.eraseFromParent();
4323  continue;
4324 
4325  case AMDGPU::S_ANDN2_B32:
4326  splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
4327  Inst.eraseFromParent();
4328  continue;
4329 
4330  case AMDGPU::S_ORN2_B32:
4331  splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
4332  Inst.eraseFromParent();
4333  continue;
4334  }
4335 
4336  if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
4337  // We cannot move this instruction to the VALU, so we should try to
4338  // legalize its operands instead.
4339  legalizeOperands(Inst, MDT);
4340  continue;
4341  }
4342 
4343  // Use the new VALU Opcode.
4344  const MCInstrDesc &NewDesc = get(NewOpcode);
4345  Inst.setDesc(NewDesc);
4346 
4347  // Remove any references to SCC. Vector instructions can't read from it, and
4348  // We're just about to add the implicit use / defs of VCC, and we don't want
4349  // both.
4350  for (unsigned i = Inst.getNumOperands() - 1; i > 0; --i) {
4351  MachineOperand &Op = Inst.getOperand(i);
4352  if (Op.isReg() && Op.getReg() == AMDGPU::SCC) {
4353  Inst.RemoveOperand(i);
4354  addSCCDefUsersToVALUWorklist(Inst, Worklist);
4355  }
4356  }
4357 
4358  if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
4359  // We are converting these to a BFE, so we need to add the missing
4360  // operands for the size and offset.
4361  unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
4364 
4365  } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
4366  // The VALU version adds the second operand to the result, so insert an
4367  // extra 0 operand.
4369  }
4370 
4372 
4373  if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
4374  const MachineOperand &OffsetWidthOp = Inst.getOperand(2);
4375  // If we need to move this to VGPRs, we need to unpack the second operand
4376  // back into the 2 separate ones for bit offset and width.
4377  assert(OffsetWidthOp.isImm() &&
4378  "Scalar BFE is only implemented for constant width and offset");
4379  uint32_t Imm = OffsetWidthOp.getImm();
4380 
4381  uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
4382  uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
4383  Inst.RemoveOperand(2); // Remove old immediate.
4384  Inst.addOperand(MachineOperand::CreateImm(Offset));
4385  Inst.addOperand(MachineOperand::CreateImm(BitWidth));
4386  }
4387 
4388  bool HasDst = Inst.getOperand(0).isReg() && Inst.getOperand(0).isDef();
4389  unsigned NewDstReg = AMDGPU::NoRegister;
4390  if (HasDst) {
4391  unsigned DstReg = Inst.getOperand(0).getReg();
4393  continue;
4394 
4395  // Update the destination register class.
4396  const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst);
4397  if (!NewDstRC)
4398  continue;
4399 
4400  if (Inst.isCopy() &&
4402  NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) {
4403  // Instead of creating a copy where src and dst are the same register
4404  // class, we just replace all uses of dst with src. These kinds of
4405  // copies interfere with the heuristics MachineSink uses to decide
4406  // whether or not to split a critical edge. Since the pass assumes
4407  // that copies will end up as machine instructions and not be
4408  // eliminated.
4409  addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist);
4410  MRI.replaceRegWith(DstReg, Inst.getOperand(1).getReg());
4411  MRI.clearKillFlags(Inst.getOperand(1).getReg());
4412  Inst.getOperand(0).setReg(DstReg);
4413 
4414  // Make sure we don't leave around a dead VGPR->SGPR copy. Normally
4415  // these are deleted later, but at -O0 it would leave a suspicious
4416  // looking illegal copy of an undef register.
4417  for (unsigned I = Inst.getNumOperands() - 1; I != 0; --I)
4418  Inst.RemoveOperand(I);
4419  Inst.setDesc(get(AMDGPU::IMPLICIT_DEF));
4420  continue;
4421  }
4422 
4423  NewDstReg = MRI.createVirtualRegister(NewDstRC);
4424  MRI.replaceRegWith(DstReg, NewDstReg);
4425  }
4426 
4427  // Legalize the operands
4428  legalizeOperands(Inst, MDT);
4429 
4430  if (HasDst)
4431  addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
4432  }
4433 }
4434 
4435 // Add/sub require special handling to deal with carry outs.
4436 bool SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
4437  MachineDominatorTree *MDT) const {
4438  if (ST.hasAddNoCarry()) {
4439  // Assume there is no user of scc since we don't select this in that case.
4440  // Since scc isn't used, it doesn't really matter if the i32 or u32 variant
4441  // is used.
4442 
4443  MachineBasicBlock &MBB = *Inst.getParent();
4445 
4446  unsigned OldDstReg = Inst.getOperand(0).getReg();
4447  unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4448 
4449  unsigned Opc = Inst.getOpcode();
4450  assert(Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32);
4451 
4452  unsigned NewOpc = Opc == AMDGPU::S_ADD_I32 ?
4453  AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
4454 
4455  assert(Inst.getOperand(3).getReg() == AMDGPU::SCC);
4456  Inst.RemoveOperand(3);
4457 
4458  Inst.setDesc(get(NewOpc));
4459  Inst.addImplicitDefUseOperands(*MBB.getParent());
4460  MRI.replaceRegWith(OldDstReg, ResultReg);
4461  legalizeOperands(Inst, MDT);
4462 
4463  addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
4464  return true;
4465  }
4466 
4467  return false;
4468 }
4469 
4470 void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist,
4471  MachineInstr &Inst) const {
4472  MachineBasicBlock &MBB = *Inst.getParent();
4474  MachineBasicBlock::iterator MII = Inst;
4475  DebugLoc DL = Inst.getDebugLoc();
4476 
4477  MachineOperand &Dest = Inst.getOperand(0);
4478  MachineOperand &Src = Inst.getOperand(1);
4479  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4480  unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4481 
4482  unsigned SubOp = ST.hasAddNoCarry() ?
4483  AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_I32_e32;
4484 
4485  BuildMI(MBB, MII, DL, get(SubOp), TmpReg)
4486  .addImm(0)
4487  .addReg(Src.getReg());
4488 
4489  BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg)
4490  .addReg(Src.getReg())
4491  .addReg(TmpReg);
4492 
4493  MRI.replaceRegWith(Dest.getReg(), ResultReg);
4494  addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
4495 }
4496 
4497 void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist,
4498  MachineInstr &Inst) const {
4499  MachineBasicBlock &MBB = *Inst.getParent();
4501  MachineBasicBlock::iterator MII = Inst;
4502  const DebugLoc &DL = Inst.getDebugLoc();
4503 
4504  MachineOperand &Dest = Inst.getOperand(0);
4505  MachineOperand &Src0 = Inst.getOperand(1);
4506  MachineOperand &Src1 = Inst.getOperand(2);
4507 
4508  if (ST.hasDLInsts()) {
4509  unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4510  legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src0, MRI, DL);
4511  legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src1, MRI, DL);
4512 
4513  BuildMI(MBB, MII, DL, get(AMDGPU::V_XNOR_B32_e64), NewDest)
4514  .add(Src0)
4515  .add(Src1);
4516 
4517  MRI.replaceRegWith(Dest.getReg(), NewDest);
4518  addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
4519  } else {
4520  // Using the identity !(x ^ y) == (!x ^ y) == (x ^ !y), we can
4521  // invert either source and then perform the XOR. If either source is a
4522  // scalar register, then we can leave the inversion on the scalar unit to
4523  // acheive a better distrubution of scalar and vector instructions.
4524  bool Src0IsSGPR = Src0.isReg() &&
4525  RI.isSGPRClass(MRI.getRegClass(Src0.getReg()));
4526  bool Src1IsSGPR = Src1.isReg() &&
4527  RI.isSGPRClass(MRI.getRegClass(Src1.getReg()));
4528  MachineInstr *Not = nullptr;
4529  MachineInstr *Xor = nullptr;
4530  unsigned Temp = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
4531  unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
4532 
4533  // Build a pair of scalar instructions and add them to the work list.
4534  // The next iteration over the work list will lower these to the vector
4535  // unit as necessary.
4536  if (Src0IsSGPR) {
4537  Not = BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp)
4538  .add(Src0);
4539  Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), NewDest)
4540  .addReg(Temp)
4541  .add(Src1);
4542  } else if (Src1IsSGPR) {
4543  Not = BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp)
4544  .add(Src1);
4545  Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), NewDest)
4546  .add(Src0)
4547  .addReg(Temp);
4548  } else {
4549  Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), Temp)
4550  .add(Src0)
4551  .add(Src1);
4552  Not = BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), NewDest)
4553  .addReg(Temp);
4554  Worklist.insert(Not);
4555  }
4556 
4557  MRI.replaceRegWith(Dest.getReg(), NewDest);
4558 
4559  Worklist.insert(Xor);
4560 
4561  addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
4562  }
4563 }
4564 
4565 void SIInstrInfo::splitScalarNotBinop(SetVectorType &Worklist,
4566  MachineInstr &Inst,
4567  unsigned Opcode) const {
4568  MachineBasicBlock &MBB = *Inst.getParent();
4570  MachineBasicBlock::iterator MII = Inst;
4571  const DebugLoc &DL = Inst.getDebugLoc();
4572 
4573  MachineOperand &Dest = Inst.getOperand(0);
4574  MachineOperand &Src0 = Inst.getOperand(1);
4575  MachineOperand &Src1 = Inst.getOperand(2);
4576 
4577  unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
4578  unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
4579 
4580  MachineInstr &Op = *BuildMI(MBB, MII, DL, get(Opcode), Interm)
4581  .add(Src0)
4582  .add(Src1);
4583 
4584  MachineInstr &Not = *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), NewDest)
4585  .addReg(Interm);
4586 
4587  Worklist.insert(&Op);
4588  Worklist.insert(&Not);
4589 
4590  MRI.replaceRegWith(Dest.getReg(), NewDest);
4591  addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
4592 }
4593 
4594 void SIInstrInfo::splitScalarBinOpN2(SetVectorType& Worklist,
4595  MachineInstr &Inst,
4596  unsigned Opcode) const {
4597  MachineBasicBlock &MBB = *Inst.getParent();
4599  MachineBasicBlock::iterator MII = Inst;
4600  const DebugLoc &DL = Inst.getDebugLoc();
4601 
4602  MachineOperand &Dest = Inst.getOperand(0);
4603  MachineOperand &Src0 = Inst.getOperand(1);
4604  MachineOperand &Src1 = Inst.getOperand(2);
4605 
4606  unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
4607  unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
4608 
4609  MachineInstr &Not = *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Interm)
4610  .add(Src1);
4611 
4612  MachineInstr &Op = *BuildMI(MBB, MII, DL, get(Opcode), NewDest)
4613  .add(Src0)
4614  .addReg(Interm);
4615 
4616  Worklist.insert(&Not);
4617  Worklist.insert(&Op);
4618 
4619  MRI.replaceRegWith(Dest.getReg(), NewDest);
4620  addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
4621 }
4622 
4623 void SIInstrInfo::splitScalar64BitUnaryOp(
4624  SetVectorType &Worklist, MachineInstr &Inst,
4625  unsigned Opcode) const {
4626  MachineBasicBlock &MBB = *Inst.getParent();
4628 
4629  MachineOperand &Dest = Inst.getOperand(0);
4630  MachineOperand &Src0 = Inst.getOperand(1);
4631  DebugLoc DL = Inst.getDebugLoc();
4632 
4633  MachineBasicBlock::iterator MII = Inst;
4634 
4635  const MCInstrDesc &InstDesc = get(Opcode);
4636  const TargetRegisterClass *Src0RC = Src0.isReg() ?
4637  MRI.getRegClass(Src0.getReg()) :
4638  &AMDGPU::SGPR_32RegClass;
4639 
4640  const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
4641 
4642  MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
4643  AMDGPU::sub0, Src0SubRC);
4644 
4645  const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
4646  const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
4647  const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
4648 
4649  unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
4650  MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0);
4651 
4652  MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
4653  AMDGPU::sub1, Src0SubRC);
4654 
4655  unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
4656  MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1).add(SrcReg0Sub1);
4657 
4658  unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
4659  BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
4660  .addReg(DestSub0)
4661  .addImm(AMDGPU::sub0)
4662  .addReg(DestSub1)
4663  .addImm(AMDGPU::sub1);
4664 
4665  MRI.replaceRegWith(Dest.getReg(), FullDestReg);
4666 
4667  Worklist.insert(&LoHalf);
4668  Worklist.insert(&HiHalf);
4669 
4670  // We don't need to legalizeOperands here because for a single operand, src0
4671  // will support any kind of input.
4672 
4673  // Move all users of this moved value.
4674  addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
4675 }
4676 
4677 void SIInstrInfo::splitScalar64BitAddSub(SetVectorType &Worklist,
4678  MachineInstr &Inst,
4679  MachineDominatorTree *MDT) const {
4680  bool IsAdd = (Inst.getOpcode() == AMDGPU::S_ADD_U64_PSEUDO);
4681 
4682  MachineBasicBlock &MBB = *Inst.getParent();
4684 
4685  unsigned FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
4686  unsigned DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4687  unsigned DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4688 
4689  unsigned CarryReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
4690  unsigned DeadCarryReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
4691 
4692  MachineOperand &Dest = Inst.getOperand(0);
4693  MachineOperand &Src0 = Inst.getOperand(1);
4694  MachineOperand &Src1 = Inst.getOperand(2);
4695  const DebugLoc &DL = Inst.getDebugLoc();
4696  MachineBasicBlock::iterator MII = Inst;
4697 
4698  const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0.getReg());
4699  const TargetRegisterClass *Src1RC = MRI.getRegClass(Src1.getReg());
4700  const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
4701  const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
4702 
4703  MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
4704  AMDGPU::sub0, Src0SubRC);
4705  MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
4706  AMDGPU::sub0, Src1SubRC);
4707 
4708 
4709  MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
4710  AMDGPU::sub1, Src0SubRC);
4711  MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
4712  AMDGPU::sub1, Src1SubRC);
4713 
4714  unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
4715  MachineInstr *LoHalf =
4716  BuildMI(MBB, MII, DL, get(LoOpc), DestSub0)
4717  .addReg(CarryReg, RegState::Define)
4718  .add(SrcReg0Sub0)
4719  .add(SrcReg1Sub0);
4720 
4721  unsigned HiOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
4722  MachineInstr *HiHalf =
4723  BuildMI(MBB, MII, DL, get(HiOpc), DestSub1)
4724  .addReg(DeadCarryReg, RegState::Define | RegState::Dead)
4725  .add(SrcReg0Sub1)
4726  .add(SrcReg1Sub1)
4727  .addReg(CarryReg, RegState::Kill);
4728 
4729  BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
4730  .addReg(DestSub0)
4731  .addImm(AMDGPU::sub0)
4732  .addReg(DestSub1)
4733  .addImm(AMDGPU::sub1);
4734 
4735  MRI.replaceRegWith(Dest.getReg(), FullDestReg);
4736 
4737  // Try to legalize the operands in case we need to swap the order to keep it
4738  // valid.
4739  legalizeOperands(*LoHalf, MDT);
4740  legalizeOperands(*HiHalf, MDT);
4741 
4742  // Move all users of this moved vlaue.
4743  addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
4744 }
4745 
4746 void SIInstrInfo::splitScalar64BitBinaryOp(SetVectorType &Worklist,
4747  MachineInstr &Inst, unsigned Opcode,
4748  MachineDominatorTree *MDT) const {
4749  MachineBasicBlock &MBB = *Inst.getParent();
4751 
4752  MachineOperand &Dest = Inst.getOperand(0);
4753  MachineOperand &Src0 = Inst.getOperand(1);
4754  MachineOperand &Src1 = Inst.getOperand(2);
4755  DebugLoc DL = Inst.getDebugLoc();
4756 
4757  MachineBasicBlock::iterator MII = Inst;
4758 
4759  const MCInstrDesc &InstDesc = get(Opcode);
4760  const TargetRegisterClass *Src0RC = Src0.isReg() ?
4761  MRI.getRegClass(Src0.getReg()) :
4762  &AMDGPU::SGPR_32RegClass;
4763 
4764  const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
4765  const TargetRegisterClass *Src1RC = Src1.isReg() ?
4766  MRI.getRegClass(Src1.getReg()) :
4767  &AMDGPU::SGPR_32RegClass;
4768 
4769  const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
4770 
4771  MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
4772  AMDGPU::sub0, Src0SubRC);
4773  MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
4774  AMDGPU::sub0, Src1SubRC);
4775  MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
4776  AMDGPU::sub1, Src0SubRC);
4777  MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
4778  AMDGPU::sub1, Src1SubRC);
4779 
4780  const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
4781  const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
4782  const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
4783 
4784  unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
4785  MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0)
4786  .add(SrcReg0Sub0)
4787  .add(SrcReg1Sub0);
4788 
4789  unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
4790  MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1)
4791  .add(SrcReg0Sub1)
4792  .add(SrcReg1Sub1);
4793 
4794  unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
4795  BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
4796  .addReg(DestSub0)
4797  .addImm(AMDGPU::sub0)
4798  .addReg(DestSub1)
4799  .addImm(AMDGPU::sub1);
4800 
4801  MRI.replaceRegWith(Dest.getReg(), FullDestReg);
4802 
4803  Worklist.insert(&LoHalf);
4804  Worklist.insert(&HiHalf);
4805 
4806  // Move all users of this moved vlaue.
4807  addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
4808 }
4809 
4810 void SIInstrInfo::splitScalar64BitXnor(SetVectorType &Worklist,
4811  MachineInstr &Inst,
4812  MachineDominatorTree *MDT) const {
4813  MachineBasicBlock &MBB = *Inst.getParent();
4815 
4816  MachineOperand &Dest = Inst.getOperand(0);
4817  MachineOperand &Src0 = Inst.getOperand(1);
4818  MachineOperand &Src1 = Inst.getOperand(2);
4819  const DebugLoc &DL = Inst.getDebugLoc();
4820 
4821  MachineBasicBlock::iterator MII = Inst;
4822 
4823  const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
4824 
4825  unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
4826 
4827  MachineOperand* Op0;
4828  MachineOperand* Op1;
4829 
4830  if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg())) {
4831  Op0 = &Src0;
4832  Op1 = &Src1;
4833  } else {
4834  Op0 = &Src1;
4835  Op1 = &Src0;
4836  }
4837 
4838  BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B64), Interm)
4839  .add(*Op0);
4840 
4841  unsigned NewDest = MRI.createVirtualRegister(DestRC);
4842 
4843  MachineInstr &Xor = *BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B64), NewDest)
4844  .addReg(Interm)
4845  .add(*Op1);
4846 
4847  MRI.replaceRegWith(Dest.getReg(), NewDest);
4848 
4849  Worklist.insert(&Xor);
4850 }
4851 
4852 void SIInstrInfo::splitScalar64BitBCNT(
4853  SetVectorType &Worklist, MachineInstr &Inst) const {
4854  MachineBasicBlock &MBB = *Inst.getParent();
4856 
4857  MachineBasicBlock::iterator MII = Inst;
4858  const DebugLoc &DL = Inst.getDebugLoc();
4859 
4860  MachineOperand &Dest = Inst.getOperand(0);
4861  MachineOperand &Src = Inst.getOperand(1);
4862 
4863  const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64);
4864  const TargetRegisterClass *SrcRC = Src.isReg() ?
4865  MRI.getRegClass(Src.getReg()) :
4866  &AMDGPU::SGPR_32RegClass;
4867 
4868  unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4869  unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4870 
4871  const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0);
4872 
4873  MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
4874  AMDGPU::sub0, SrcSubRC);
4875  MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
4876  AMDGPU::sub1, SrcSubRC);
4877 
4878  BuildMI(MBB, MII, DL, InstDesc, MidReg).add(SrcRegSub0).addImm(0);
4879 
4880  BuildMI(MBB, MII, DL, InstDesc, ResultReg).add(SrcRegSub1).addReg(MidReg);
4881 
4882  MRI.replaceRegWith(Dest.getReg(), ResultReg);
4883 
4884  // We don't need to legalize operands here. src0 for etiher instruction can be
4885  // an SGPR, and the second input is unused or determined here.
4886  addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
4887 }
4888 
4889 void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist,
4890  MachineInstr &Inst) const {
4891  MachineBasicBlock &MBB = *Inst.getParent();
4893  MachineBasicBlock::iterator MII = Inst;
4894  const DebugLoc &DL = Inst.getDebugLoc();
4895 
4896  MachineOperand &Dest = Inst.getOperand(0);
4897  uint32_t Imm = Inst.getOperand(2).getImm();
4898  uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
4899  uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
4900 
4901  (void) Offset;
4902 
4903  // Only sext_inreg cases handled.
4904  assert(Inst.getOpcode() == AMDGPU::S_BFE_I64 && BitWidth <= 32 &&
4905  Offset == 0 && "Not implemented");
4906 
4907  if (BitWidth < 32) {
4908  unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4909  unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4910  unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
4911 
4912  BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo)
4913  .addReg(Inst.getOperand(1).getReg(), 0, AMDGPU::sub0)
4914  .addImm(0)
4915  .addImm(BitWidth);
4916 
4917  BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi)
4918  .addImm(31)
4919  .addReg(MidRegLo);
4920 
4921  BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
4922  .addReg(MidRegLo)
4923  .addImm(AMDGPU::sub0)
4924  .addReg(MidRegHi)
4925  .addImm(AMDGPU::sub1);
4926 
4927  MRI.replaceRegWith(Dest.getReg(), ResultReg);
4928  addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
4929  return;
4930  }
4931 
4932  MachineOperand &Src = Inst.getOperand(1);
4933  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4934  unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
4935 
4936  BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg)
4937  .addImm(31)
4938  .addReg(Src.getReg(), 0, AMDGPU::sub0);
4939 
4940  BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
4941  .addReg(Src.getReg(), 0, AMDGPU::sub0)
4942  .addImm(AMDGPU::sub0)
4943  .addReg(TmpReg)
4944  .addImm(AMDGPU::sub1);
4945 
4946  MRI.replaceRegWith(Dest.getReg(), ResultReg);
4947  addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
4948 }
4949 
4950 void SIInstrInfo::addUsersToMoveToVALUWorklist(
4951  unsigned DstReg,
4953  SetVectorType &Worklist) const {
4954  for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
4955  E = MRI.use_end(); I != E;) {
4956  MachineInstr &UseMI = *I->getParent();
4957  if (!canReadVGPR(UseMI, I.getOperandNo())) {
4958  Worklist.insert(&UseMI);
4959 
4960  do {
4961  ++I;
4962  } while (I != E && I->getParent() == &UseMI);
4963  } else {
4964  ++I;
4965  }
4966  }
4967 }
4968 
4969 void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
4970  MachineRegisterInfo &MRI,
4971  MachineInstr &Inst) const {
4972  unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4973  MachineBasicBlock *MBB = Inst.getParent();
4974  MachineOperand &Src0 = Inst.getOperand(1);
4975  MachineOperand &Src1 = Inst.getOperand(2);
4976  const DebugLoc &DL = Inst.getDebugLoc();
4977 
4978  switch (Inst.getOpcode()) {
4979  case AMDGPU::S_PACK_LL_B32_B16: {
4980  unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4981  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4982 
4983  // FIXME: Can do a lot better if we know the high bits of src0 or src1 are
4984  // 0.
4985  BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
4986  .addImm(0xffff);
4987 
4988  BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_B32_e64), TmpReg)
4989  .addReg(ImmReg, RegState::Kill)
4990  .add(Src0);
4991 
4992  BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHL_OR_B32), ResultReg)
4993  .add(Src1)
4994  .addImm(16)
4995  .addReg(TmpReg, RegState::Kill);
4996  break;
4997  }
4998  case AMDGPU::S_PACK_LH_B32_B16: {
4999  unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5000  BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
5001  .addImm(0xffff);
5002  BuildMI(*MBB, Inst, DL, get(AMDGPU::V_BFI_B32), ResultReg)
5003  .addReg(ImmReg, RegState::Kill)
5004  .add(Src0)
5005  .add(Src1);
5006  break;
5007  }
5008  case AMDGPU::S_PACK_HH_B32_B16: {
5009  unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5010  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5011  BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg)
5012  .addImm(16)
5013  .add(Src0);
5014  BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
5015  .addImm(0xffff0000);
5016  BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_OR_B32), ResultReg)
5017  .add(Src1)
5018  .addReg(ImmReg, RegState::Kill)
5019  .addReg(TmpReg, RegState::Kill);
5020  break;
5021  }
5022  default:
5023  llvm_unreachable("unhandled s_pack_* instruction");
5024  }
5025 
5026  MachineOperand &Dest = Inst.getOperand(0);
5027  MRI.replaceRegWith(Dest.getReg(), ResultReg);
5028  addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
5029 }
5030 
5031 void SIInstrInfo::addSCCDefUsersToVALUWorklist(
5032  MachineInstr &SCCDefInst, SetVectorType &Worklist) const {
5033  // This assumes that all the users of SCC are in the same block
5034  // as the SCC def.
5035  for (MachineInstr &MI :
5037  SCCDefInst.getParent()->end())) {
5038  // Exit if we find another SCC def.
5039  if (MI.findRegisterDefOperandIdx(AMDGPU::SCC, false, false, &RI) != -1)
5040  return;
5041 
5042  if (MI.findRegisterUseOperandIdx(AMDGPU::SCC, false, &RI) != -1)
5043  Worklist.insert(&MI);
5044  }
5045 }
5046 
5047 const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
5048  const MachineInstr &Inst) const {
5049  const TargetRegisterClass *NewDstRC = getOpRegClass(Inst, 0);
5050 
5051  switch (Inst.getOpcode()) {
5052  // For target instructions, getOpRegClass just returns the virtual register
5053  // class associated with the operand, so we need to find an equivalent VGPR
5054  // register class in order to move the instruction to the VALU.
5055  case AMDGPU::COPY:
5056  case AMDGPU::PHI:
5057  case AMDGPU::REG_SEQUENCE:
5058  case AMDGPU::INSERT_SUBREG:
5059  case AMDGPU::WQM:
5060  case AMDGPU::WWM:
5061  if (RI.hasVGPRs(NewDstRC))
5062  return nullptr;
5063 
5064  NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
5065  if (!NewDstRC)
5066  return nullptr;
5067  return NewDstRC;
5068  default:
5069  return NewDstRC;
5070  }
5071 }
5072 
5073 // Find the one SGPR operand we are allowed to use.
5074 unsigned SIInstrInfo::findUsedSGPR(const MachineInstr &MI,
5075  int OpIndices[3]) const {
5076  const MCInstrDesc &Desc = MI.getDesc();
5077 
5078  // Find the one SGPR operand we are allowed to use.
5079  //
5080  // First we need to consider the instruction's operand requirements before
5081  // legalizing. Some operands are required to be SGPRs, such as implicit uses
5082  // of VCC, but we are still bound by the constant bus requirement to only use
5083  // one.
5084  //
5085  // If the operand's class is an SGPR, we can never move it.
5086 
5087  unsigned SGPRReg = findImplicitSGPRRead(MI);
5088  if (SGPRReg != AMDGPU::NoRegister)
5089  return SGPRReg;
5090 
5091  unsigned UsedSGPRs[3] = { AMDGPU::NoRegister };
5092  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
5093 
5094  for (unsigned i = 0; i < 3; ++i) {
5095  int Idx = OpIndices[i];
5096  if (Idx == -1)
5097  break;
5098 
5099  const MachineOperand &MO = MI.getOperand(Idx);
5100  if (!MO.isReg())
5101  continue;
5102 
5103  // Is this operand statically required to be an SGPR based on the operand
5104  // constraints?
5105  const TargetRegisterClass *OpRC = RI.getRegClass(Desc.OpInfo[Idx].RegClass);
5106  bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
5107  if (IsRequiredSGPR)
5108  return MO.getReg();
5109 
5110  // If this could be a VGPR or an SGPR, Check the dynamic register class.
5111  unsigned Reg = MO.getReg();
5112  const TargetRegisterClass *RegRC = MRI.getRegClass(Reg);
5113  if (RI.isSGPRClass(RegRC))
5114  UsedSGPRs[i] = Reg;
5115  }
5116 
5117  // We don't have a required SGPR operand, so we have a bit more freedom in
5118  // selecting operands to move.
5119 
5120  // Try to select the most used SGPR. If an SGPR is equal to one of the
5121  // others, we choose that.
5122  //
5123  // e.g.
5124  // V_FMA_F32 v0, s0, s0, s0 -> No moves
5125  // V_FMA_F32 v0, s0, s1, s0 -> Move s1
5126 
5127  // TODO: If some of the operands are 64-bit SGPRs and some 32, we should
5128  // prefer those.
5129 
5130  if (UsedSGPRs[0] != AMDGPU::NoRegister) {
5131  if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
5132  SGPRReg = UsedSGPRs[0];
5133  }
5134 
5135  if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) {
5136  if (UsedSGPRs[1] == UsedSGPRs[2])
5137  SGPRReg = UsedSGPRs[1];
5138  }
5139 
5140  return SGPRReg;
5141 }
5142 
5144  unsigned OperandName) const {
5145  int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName);
5146  if (Idx == -1)
5147  return nullptr;
5148 
5149  return &MI.getOperand(Idx);
5150 }
5151 
5153  uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
5154  if (ST.isAmdHsaOS()) {
5155  // Set ATC = 1. GFX9 doesn't have this bit.
5156  if (ST.getGeneration() <= AMDGPUSubtarget::VOLCANIC_ISLANDS)
5157  RsrcDataFormat |= (1ULL << 56);
5158 
5159  // Set MTYPE = 2 (MTYPE_UC = uncached). GFX9 doesn't have this.
5160  // BTW, it disables TC L2 and therefore decreases performance.
5161  if (ST.getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS)
5162  RsrcDataFormat |= (2ULL << 59);
5163  }
5164 
5165  return RsrcDataFormat;
5166 }
5167 
5169  uint64_t Rsrc23 = getDefaultRsrcDataFormat() |
5171  0xffffffff; // Size;
5172 
5173  // GFX9 doesn't have ELEMENT_SIZE.
5174  if (ST.getGeneration() <= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
5175  uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
5176  Rsrc23 |= EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT;
5177  }
5178 
5179  // IndexStride = 64.
5180  Rsrc23 |= UINT64_C(3) << AMDGPU::RSRC_INDEX_STRIDE_SHIFT;
5181 
5182  // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
5183  // Clear them unless we want a huge stride.
5184  if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
5185  Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
5186 
5187  return Rsrc23;
5188 }
5189 
5191  unsigned Opc = MI.getOpcode();
5192 
5193  return isSMRD(Opc);
5194 }
5195 
5197  unsigned Opc = MI.getOpcode();
5198 
5199  return isMUBUF(Opc) || isMTBUF(Opc) || isMIMG(Opc);
5200 }
5201 
5203  int &FrameIndex) const {
5204  const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::vaddr);
5205  if (!Addr || !Addr->isFI())
5206  return AMDGPU::NoRegister;
5207 
5208  assert(!MI.memoperands_empty() &&
5210 
5211  FrameIndex = Addr->getIndex();
5212  return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
5213 }
5214 
5216  int &FrameIndex) const {
5217  const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::addr);
5218  assert(Addr && Addr->isFI());
5219  FrameIndex = Addr->getIndex();
5220  return getNamedOperand(MI, AMDGPU::OpName::data)->getReg();
5221 }
5222 
5224  int &FrameIndex) const {
5225  if (!MI.mayLoad())
5226  return AMDGPU::NoRegister;
5227 
5228  if (isMUBUF(MI) || isVGPRSpill(MI))
5229  return isStackAccess(MI, FrameIndex);
5230 
5231  if (isSGPRSpill(MI))
5232  return isSGPRStackAccess(MI, FrameIndex);
5233 
5234  return AMDGPU::NoRegister;
5235 }
5236 
5238  int &FrameIndex) const {
5239  if (!MI.mayStore())
5240  return AMDGPU::NoRegister;
5241 
5242  if (isMUBUF(MI) || isVGPRSpill(MI))
5243  return isStackAccess(MI, FrameIndex);
5244 
5245  if (isSGPRSpill(MI))
5246  return isSGPRStackAccess(MI, FrameIndex);
5247 
5248  return AMDGPU::NoRegister;
5249 }
5250 
5252  unsigned Size = 0;
5255  while (++I != E && I->isInsideBundle()) {
5256  assert(!I->isBundle() && "No nested bundle!");
5257  Size += getInstSizeInBytes(*I);
5258  }
5259 
5260  return Size;
5261 }
5262 
5264  unsigned Opc = MI.getOpcode();
5265  const MCInstrDesc &Desc = getMCOpcodeFromPseudo(Opc);
5266  unsigned DescSize = Desc.getSize();
5267 
5268  // If we have a definitive size, we can use it. Otherwise we need to inspect
5269  // the operands to know the size.
5270  if (isFixedSize(MI))
5271  return DescSize;
5272 
5273  // 4-byte instructions may have a 32-bit literal encoded after them. Check
5274  // operands that coud ever be literals.
5275  if (isVALU(MI) || isSALU(MI)) {
5276  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
5277  if (Src0Idx == -1)
5278  return DescSize; // No operands.
5279 
5280  if (isLiteralConstantLike(MI.getOperand(Src0Idx), Desc.OpInfo[Src0Idx]))
5281  return DescSize + 4;
5282 
5283  int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
5284  if (Src1Idx == -1)
5285  return DescSize;
5286 
5287  if (isLiteralConstantLike(MI.getOperand(Src1Idx), Desc.OpInfo[Src1Idx]))
5288  return DescSize + 4;
5289 
5290  int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
5291  if (Src2Idx == -1)
5292  return DescSize;
5293 
5294  if (isLiteralConstantLike(MI.getOperand(Src2Idx), Desc.OpInfo[Src2Idx]))
5295  return DescSize + 4;
5296 
5297  return DescSize;
5298  }
5299 
5300  switch (Opc) {
5301  case TargetOpcode::IMPLICIT_DEF:
5302  case TargetOpcode::KILL:
5303  case TargetOpcode::DBG_VALUE:
5305  return 0;
5306  case TargetOpcode::BUNDLE:
5307  return getInstBundleSize(MI);
5308  case TargetOpcode::INLINEASM: {
5309  const MachineFunction *MF = MI.getParent()->getParent();
5310  const char *AsmStr = MI.getOperand(0).getSymbolName();
5311  return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
5312  }
5313  default:
5314  return DescSize;
5315  }
5316 }
5317 
5319  if (!isFLAT(MI))
5320  return false;
5321 
5322  if (MI.memoperands_empty())
5323  return true;
5324 
5325  for (const MachineMemOperand *MMO : MI.memoperands()) {
5326  if (MMO->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS)
5327  return true;
5328  }
5329  return false;
5330 }
5331 
5333  return Branch.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO;
5334 }
5335 
5337  MachineBasicBlock *IfEnd) const {
5339  assert(TI != IfEntry->end());
5340 
5341  MachineInstr *Branch = &(*TI);
5342  MachineFunction *MF = IfEntry->getParent();
5343  MachineRegisterInfo &MRI = IfEntry->getParent()->getRegInfo();
5344 
5345  if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
5346  unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
5347  MachineInstr *SIIF =
5348  BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_IF), DstReg)
5349  .add(Branch->getOperand(0))
5350  .add(Branch->getOperand(1));
5351  MachineInstr *SIEND =
5352  BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_END_CF))
5353  .addReg(DstReg);
5354 
5355  IfEntry->erase(TI);
5356  IfEntry->insert(IfEntry->end(), SIIF);
5357  IfEnd->insert(IfEnd->getFirstNonPHI(), SIEND);
5358  }
5359 }
5360 
5362  MachineBasicBlock *LoopEntry, MachineBasicBlock *LoopEnd) const {
5364  // We expect 2 terminators, one conditional and one unconditional.
5365  assert(TI != LoopEnd->end());
5366 
5367  MachineInstr *Branch = &(*TI);
5368  MachineFunction *MF = LoopEnd->getParent();
5369  MachineRegisterInfo &MRI = LoopEnd->getParent()->getRegInfo();
5370 
5371  if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
5372 
5373  unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
5374  unsigned BackEdgeReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
5375  MachineInstrBuilder HeaderPHIBuilder =
5376  BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg);
5377  for (MachineBasicBlock::pred_iterator PI = LoopEntry->pred_begin(),
5378  E = LoopEntry->pred_end();
5379  PI != E; ++PI) {
5380  if (*PI == LoopEnd) {
5381  HeaderPHIBuilder.addReg(BackEdgeReg);
5382  } else {
5383  MachineBasicBlock *PMBB = *PI;
5384  unsigned ZeroReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
5386  ZeroReg, 0);
5387  HeaderPHIBuilder.addReg(ZeroReg);
5388  }
5389  HeaderPHIBuilder.addMBB(*PI);
5390  }
5391  MachineInstr *HeaderPhi = HeaderPHIBuilder;
5392  MachineInstr *SIIFBREAK = BuildMI(*(MF), Branch->getDebugLoc(),
5393  get(AMDGPU::SI_IF_BREAK), BackEdgeReg)
5394  .addReg(DstReg)
5395  .add(Branch->getOperand(0));
5396  MachineInstr *SILOOP =
5397  BuildMI(*(MF), Branch->getDebugLoc(), get(AMDGPU::SI_LOOP))
5398  .addReg(BackEdgeReg)
5399  .addMBB(LoopEntry);
5400 
5401  LoopEntry->insert(LoopEntry->begin(), HeaderPhi);
5402  LoopEnd->erase(TI);
5403  LoopEnd->insert(LoopEnd->end(), SIIFBREAK);
5404  LoopEnd->insert(LoopEnd->end(), SILOOP);
5405  }
5406 }
5407 
5410  static const std::pair<int, const char *> TargetIndices[] = {
5411  {AMDGPU::TI_CONSTDATA_START, "amdgpu-constdata-start"},
5412  {AMDGPU::TI_SCRATCH_RSRC_DWORD0, "amdgpu-scratch-rsrc-dword0"},
5413  {AMDGPU::TI_SCRATCH_RSRC_DWORD1, "amdgpu-scratch-rsrc-dword1"},
5414  {AMDGPU::TI_SCRATCH_RSRC_DWORD2, "amdgpu-scratch-rsrc-dword2"},
5415  {AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}};
5416  return makeArrayRef(TargetIndices);
5417 }
5418 
5419 /// This is used by the post-RA scheduler (SchedulePostRAList.cpp). The
5420 /// post-RA version of misched uses CreateTargetMIHazardRecognizer.
5423  const ScheduleDAG *DAG) const {
5424  return new GCNHazardRecognizer(DAG->MF);
5425 }
5426 
5427 /// This is the hazard recognizer used at -O0 by the PostRAHazardRecognizer
5428 /// pass.
5431  return new GCNHazardRecognizer(MF);
5432 }
5433 
5434 std::pair<unsigned, unsigned>
5436  return std::make_pair(TF & MO_MASK, TF & ~MO_MASK);
5437 }
5438 
5441  static const std::pair<unsigned, const char *> TargetFlags[] = {
5442  { MO_GOTPCREL, "amdgpu-gotprel" },
5443  { MO_GOTPCREL32_LO, "amdgpu-gotprel32-lo" },
5444  { MO_GOTPCREL32_HI, "amdgpu-gotprel32-hi" },
5445  { MO_REL32_LO, "amdgpu-rel32-lo" },
5446  { MO_REL32_HI, "amdgpu-rel32-hi" }
5447  };
5448 
5449  return makeArrayRef(TargetFlags);
5450 }
5451 
5453  return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY &&
5454  MI.modifiesRegister(AMDGPU::EXEC, &RI);
5455 }
5456 
5460  const DebugLoc &DL,
5461  unsigned DestReg) const {
5462  if (ST.hasAddNoCarry())
5463  return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e64), DestReg);
5464 
5465  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
5466  unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
5467  MRI.setRegAllocationHint(UnusedCarry, 0, AMDGPU::VCC);
5468 
5469  return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg)
5470  .addReg(UnusedCarry, RegState::Define | RegState::Dead);
5471 }
5472 
5473 bool SIInstrInfo::isKillTerminator(unsigned Opcode) {
5474  switch (Opcode) {
5475  case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
5476  case AMDGPU::SI_KILL_I1_TERMINATOR:
5477  return true;
5478  default:
5479  return false;
5480  }
5481 }
5482 
5484  switch (Opcode) {
5485  case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
5486  return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
5487  case AMDGPU::SI_KILL_I1_PSEUDO:
5488  return get(AMDGPU::SI_KILL_I1_TERMINATOR);
5489  default:
5490  llvm_unreachable("invalid opcode, expected SI_KILL_*_PSEUDO");
5491  }
5492 }
5493 
5495  if (!isSMRD(MI))
5496  return false;
5497 
5498  // Check that it is using a buffer resource.
5499  int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sbase);
5500  if (Idx == -1) // e.g. s_memtime
5501  return false;
5502 
5503  const auto RCID = MI.getDesc().OpInfo[Idx].RegClass;
5504  return RCID == AMDGPU::SReg_128RegClassID;
5505 }
5506 
5507 // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
5509  SI = 0,
5510  VI = 1,
5511  SDWA = 2,
5512  SDWA9 = 3,
5513  GFX80 = 4,
5514  GFX9 = 5
5515 };
5516 
5518  switch (ST.getGeneration()) {
5519  default:
5520  break;
5523  return SIEncodingFamily::SI;
5525  case AMDGPUSubtarget::GFX9:
5526  return SIEncodingFamily::VI;
5527  }
5528  llvm_unreachable("Unknown subtarget generation!");
5529 }
5530 
5531 int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
5533 
5534  if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
5535  ST.getGeneration() >= AMDGPUSubtarget::GFX9)
5536  Gen = SIEncodingFamily::GFX9;
5537 
5538  if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
5539  Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
5541  // Adjust the encoding family to GFX80 for D16 buffer instructions when the
5542  // subtarget has UnpackedD16VMem feature.
5543  // TODO: remove this when we discard GFX80 encoding.
5544  if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16Buf))
5546 
5547  int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
5548 
5549  // -1 means that Opcode is already a native instruction.
5550  if (MCOp == -1)
5551  return Opcode;
5552 
5553  // (uint16_t)-1 means that Opcode is a pseudo instruction that has
5554  // no encoding in the given subtarget generation.
5555  if (MCOp == (uint16_t)-1)
5556  return -1;
5557 
5558  return MCOp;
5559 }
5560 
5561 static
5563  assert(RegOpnd.isReg());
5564  return RegOpnd.isUndef() ? TargetInstrInfo::RegSubRegPair() :
5565  getRegSubRegPair(RegOpnd);
5566 }
5567 
5570  assert(MI.isRegSequence());
5571  for (unsigned I = 0, E = (MI.getNumOperands() - 1)/ 2; I < E; ++I)
5572  if (MI.getOperand(1 + 2 * I + 1).getImm() == SubReg) {
5573  auto &RegOp = MI.getOperand(1 + 2 * I);
5574  return getRegOrUndef(RegOp);
5575  }
5577 }
5578 
5579 // Try to find the definition of reg:subreg in subreg-manipulation pseudos
5580 // Following a subreg of reg:subreg isn't supported
5583  if (!RSR.SubReg)
5584  return false;
5585  switch (MI.getOpcode()) {
5586  default: break;
5587  case AMDGPU::REG_SEQUENCE:
5588  RSR = getRegSequenceSubReg(MI, RSR.SubReg);
5589  return true;
5590  // EXTRACT_SUBREG ins't supported as this would follow a subreg of subreg
5591  case AMDGPU::INSERT_SUBREG:
5592  if (RSR.SubReg == (unsigned)MI.getOperand(3).getImm())
5593  // inserted the subreg we're looking for
5594  RSR = getRegOrUndef(MI.getOperand(2));
5595  else { // the subreg in the rest of the reg
5596  auto R1 = getRegOrUndef(MI.getOperand(1));
5597  if (R1.SubReg) // subreg of subreg isn't supported
5598  return false;
5599  RSR.Reg = R1.Reg;
5600  }
5601  return true;
5602  }
5603  return false;
5604 }
5605 
5607  MachineRegisterInfo &MRI) {
5608  assert(MRI.isSSA());
5610  return nullptr;
5611 
5612  auto RSR = P;
5613  auto *DefInst = MRI.getVRegDef(RSR.Reg);
5614  while (auto *MI = DefInst) {
5615  DefInst = nullptr;
5616  switch (MI->getOpcode()) {
5617  case AMDGPU::COPY:
5618  case AMDGPU::V_MOV_B32_e32: {
5619  auto &Op1 = MI->getOperand(1);
5620  if (Op1.isReg() &&
5621  TargetRegisterInfo::isVirtualRegister(Op1.getReg())) {
5622  if (Op1.isUndef())
5623  return nullptr;
5624  RSR = getRegSubRegPair(Op1);
5625  DefInst = MRI.getVRegDef(RSR.Reg);
5626  }
5627  break;
5628  }
5629  default:
5630  if (followSubRegDef(*MI, RSR)) {
5631  if (!RSR.Reg)
5632  return nullptr;
5633  DefInst = MRI.getVRegDef(RSR.Reg);
5634  }
5635  }
5636  if (!DefInst)
5637  return MI;
5638  }
5639  return nullptr;
5640 }
unsigned getTargetFlags() const
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description.
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
uint64_t CallInst * C
unsigned getNumImplicitUses() const
Return the number of implicit uses this instruction has.
Definition: MCInstrDesc.h:527
bool hasSDWAOmod() const
unsigned getVALUOp(const MachineInstr &MI) const
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
const MachineInstrBuilder & add(const MachineOperand &MO) const
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
EVT getValueType() const
Return the ValueType of the referenced return value.
static bool isSGPRSpill(const MachineInstr &MI)
Definition: SIInstrInfo.h:519
Interface definition for SIRegisterInfo.
bool hasRegisterImplicitUseOperand(unsigned Reg) const
Returns true if the MachineInstr has an implicit-use operand of exactly the given register (not consi...
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned DstReg, ArrayRef< MachineOperand > Cond, unsigned TrueReg, unsigned FalseReg) const override
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
bool canReadVGPR(const MachineInstr &MI, unsigned OpNo) const
bool contains(unsigned Reg) const
Return true if the specified register is included in this register class.
Diagnostic information for unsupported feature in backend.
AMDGPU specific subclass of TargetSubtarget.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
bool IsDead
instr_iterator instr_end()
bool isVGPRCopy(const MachineInstr &MI) const
Definition: SIInstrInfo.h:617
MachineBasicBlock * getMBB() const
bool hasScalarStores() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
LLVM_NODISCARD T pop_back_val()
Definition: SetVector.h:228
This class represents lattice values for constants.
Definition: AllocatorList.h:24
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:492
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
static bool sopkIsZext(const MachineInstr &MI)
Definition: SIInstrInfo.h:563
uint64_t getDefaultRsrcDataFormat() const
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MCInstrDesc.h:437
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static bool isStride64(unsigned Opc)
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
unsigned calculateLDSSpillAddress(MachineBasicBlock &MBB, MachineInstr &MI, RegScavenger *RS, unsigned TmpReg, unsigned Offset, unsigned Size) const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:383
bool hasModifiersSet(const MachineInstr &MI, unsigned OpName) const
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:164
unsigned getReg() const
getReg - Returns the register number.
void setIsUndef(bool Val=true)
unsigned insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned SrcReg, int Value) const
unsigned getAddressSpaceForPseudoSourceKind(unsigned Kind) const override
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
unsigned Reg
unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
unsigned getSubReg() const
bool isInlineAsm() const
static bool isSOPK(const MachineInstr &MI)
Definition: SIInstrInfo.h:363
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
bool isRegSequence() const
Address space for private memory.
Definition: AMDGPU.h:261
uint64_t getSize() const
Return the size in bytes of the memory reference.
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const
Get required immediate operand.
Definition: SIInstrInfo.h:855
MachineBasicBlock reference.
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:306
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc)
static SDValue findChainOperand(SDNode *Load)
bool isInlineConstant(const APInt &Imm) const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
unsigned getSpillSize(const TargetRegisterClass &RC) const
Return the size in bytes of the stack slot allocated to hold a spilled copy of a register from class ...
static bool isSMRD(const MachineInstr &MI)
Definition: SIInstrInfo.h:435
void clearVirtRegs()
clearVirtRegs - Remove all virtual registers (after physreg assignment).
void legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1509
return AArch64::GPR64RegClass contains(Reg)
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction...
Definition: MachineInstr.h:700
iterator_range< succ_iterator > successors()
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static unsigned getAddrSpace(StringRef R)
Definition: DataLayout.cpp:228
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi)
static bool isFixedSize(const MachineInstr &MI)
Definition: SIInstrInfo.h:581
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:564
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
static bool isDS(const MachineInstr &MI)
Definition: SIInstrInfo.h:445
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
unsigned scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj)
Make a register of the specific register class available from the current position backwards to the p...
A description of a memory reference used in the backend.
static use_iterator use_end()
Address space for constant memory (VTX2)
Definition: AMDGPU.h:259
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
static bool isFLAT(const MachineInstr &MI)
Definition: SIInstrInfo.h:471
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:371
static ManagedStatic< DebugCounter > DC
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:211
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
const HexagonInstrInfo * TII
uint64_t getScratchRsrcWords23() const
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
bool isAlwaysGDS(uint16_t Opcode) const
const TargetRegisterClass * getRegClassConstraint(unsigned OpIdx, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
Compute the static register class constraint for operand OpIdx.
bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:412
static bool isGather4(const MachineInstr &MI)
Definition: SIInstrInfo.h:463
static bool isMIMG(const MachineInstr &MI)
Definition: SIInstrInfo.h:455
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void setImplicit(bool Val=true)
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned SubReg
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool opCanUseInlineConstant(unsigned OpType) const
void setHasSpilledVGPRs(bool Spill=true)
Name of external global symbol.
bool hasSDWAOutModsVOPC() const
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:649
void insertWaitStates(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, int Count) const
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
bool opCanUseLiteralConstant(unsigned OpType) const
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
Definition: SIInstrInfo.h:868
const char * getSymbolName() const
static bool isVALU(const MachineInstr &MI)
Definition: SIInstrInfo.h:323
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
unsigned getID() const
Return the register class ID number.
bool isSGPRClass(const TargetRegisterClass *RC) const
This file implements a class to represent arbitrary precision integral constant values and operations...
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:667
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
int getMCOpcode(uint16_t Opcode, unsigned Gen)
uint8_t OperandType
Information about the type of the operand.
Definition: MCInstrDesc.h:79
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1575
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:406
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:419
void clearKillFlags(unsigned Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
static bool shouldReadExec(const MachineInstr &MI)
void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry, MachineBasicBlock *LoopEnd) const
const uint64_t RSRC_DATA_FORMAT
Definition: SIInstrInfo.h:1008
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
Itinerary data supplied by a subtarget to be used by a target.
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
static unsigned findImplicitSGPRRead(const MachineInstr &MI)
bool isBasicBlockPrologue(const MachineInstr &MI) const override
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
unsigned getUndefRegState(bool B)
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
Analysis containing CSE Info
Definition: CSEInfo.cpp:21
BasicBlockListType::iterator iterator
unsigned getKillRegState(bool B)
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
const Value * getValue() const
Return the base address of the memory access.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
TargetInstrInfo::RegSubRegPair RegSubRegPair
static int64_t getFoldableImm(const MachineOperand *MO)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, unsigned OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:820
#define P(N)
Address of a global value.
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
This file declares the machine register scavenger class.
const TargetRegisterInfo * getTargetRegisterInfo() const
unsigned const MachineRegisterInfo * MRI
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
bool isFoldableCopy(const MachineInstr &MI) const
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:516
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
HazardRecognizer - This determines whether or not an instruction can be issued this cycle...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
Definition: MCInstrDesc.h:235
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Address space for flat memory.
Definition: AMDGPU.h:255
MachineInstrBuilder & UseMI
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
bool isReallyTriviallyReMaterializable(const MachineInstr &MI, AliasAnalysis *AA) const override
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
constexpr char WavefrontSize[]
Key for Kernel::CodeProps::Metadata::mWavefrontSize.
const SDValue & getOperand(unsigned Num) const
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions...
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool has16BitInsts() const
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned DstReg, ArrayRef< MachineOperand > Cond, unsigned TrueReg, unsigned FalseReg) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool expandPostRAPseudo(MachineInstr &MI) const override
bool hasVGPRs(const TargetRegisterClass *RC) const
static unsigned getNumOperandsNoGlue(SDNode *Node)
Definition: SIInstrInfo.cpp:96
static void removeModOperands(MachineInstr &MI)
void insertReturn(MachineBasicBlock &MBB) const
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
LLVM_READONLY int commuteOpcode(unsigned Opc) const
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:549
unsigned insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned SrcReg, int Value) const
bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, const MachineOperand &MO) const
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
Generation getGeneration() const
self_iterator getIterator()
Definition: ilist_node.h:82
void materializeImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, int64_t Value) const
bool hasSDWASdst() const
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
unsigned insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS=nullptr) const override
const TargetRegisterClass * getSubRegClass(const TargetRegisterClass *RC, unsigned SubIdx) const
std::vector< MachineBasicBlock * >::iterator pred_iterator
const MachineInstrBuilder & addFrameIndex(int Idx) const
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:193
static bool isVOP2(const MachineInstr &MI)
Definition: SIInstrInfo.h:387
bool isCopy() const
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:398
unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const
void setHasSpilledSGPRs(bool Spill=true)
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
const MachineBasicBlock & front() const
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
size_t size() const
Definition: SmallVector.h:53
This class contains a discriminated union of information about pointers in memory operands...
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, unsigned Src0OpName, MachineOperand &Src1, unsigned Src1OpName) const
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
Operands with register or inline constant.
Definition: SIDefines.h:124
EH_LABEL - Represents a label in mid basic block used to track locations needed for debug and excepti...
Definition: ISDOpcodes.h:672
void setIsKill(bool Val=true)
const uint64_t RSRC_TID_ENABLE
Definition: SIInstrInfo.h:1011
bool hasUnpackedD16VMem() const
bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg, MachineRegisterInfo *MRI) const final
The memory access writes data.
Representation for a specific memory location.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getNumWaitStates(const MachineInstr &MI) const
Return the number of wait states that result from executing this instruction.
static bool isUndef(ArrayRef< int > Mask)
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:298
unsigned findTiedOperandIdx(unsigned OpIdx) const
Given the index of a tied register operand, find the operand it is tied to.
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
Iterator for intrusive lists based on ilist_node.
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:520
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_READONLY int getVOPe32(uint16_t Opcode)
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:534
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
Definition: SIInstrInfo.h:946
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213
static bool offsetsDoNotOverlap(int WidthA, int OffsetA, int WidthB, int OffsetB)
static bool isSALU(const MachineInstr &MI)
Definition: SIInstrInfo.h:315
MachineOperand class - Representation of each machine instruction operand.
unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF) const
Returns a register that is not used at any point in the function.
void setRegAllocationHint(unsigned VReg, unsigned Type, unsigned PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register...
bool isLiteralConstantLike(const MachineOperand &MO, const MCOperandInfo &OpInfo) const
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
A pair composed of a register and a sub-register index.
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
MachineInstrBuilder MachineInstrBuilder & DefMI
bool areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA=nullptr) const override
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned DestReg) const
Return a partially built integer add instruction without carry.
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const uint64_t RSRC_INDEX_STRIDE_SHIFT
Definition: SIInstrInfo.h:1010
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
Represents one node in the SelectionDAG.
bool shouldClusterMemOps(MachineOperand &BaseOp1, MachineOperand &BaseOp2, unsigned NumLoads) const override
bool isVariadic(QueryType Type=IgnoreBundle) const
Return true if this instruction can have a variable number of operands.
Definition: MachineInstr.h:607
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
int64_t getImm() const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
unsigned pred_size() const
bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, int64_t &Offset2) const override
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, unsigned Reg) const
MCSymbol reference (for debug/eh info)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:539
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
Class for arbitrary precision integers.
Definition: APInt.h:70
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override
bool isShader(CallingConv::ID cc)
Interface for the AMDGPU Implementation of the Intrinsic Info class.
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, const MachineOperand &BaseOp1, const MachineInstr &MI2, const MachineOperand &BaseOp2)
iterator_range< mop_iterator > implicit_operands()
Definition: MachineInstr.h:473
void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
void replaceRegWith(unsigned FromReg, unsigned ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
Definition: SIInstrInfo.h:477
static bool isVOP3(const MachineInstr &MI)
Definition: SIInstrInfo.h:395
amdgpu Simplify well known AMD library false Value Value * Arg
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:254
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Provides AMDGPU specific target descriptions.
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
Representation of each machine instruction.
Definition: MachineInstr.h:64
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool hasInv2PiInlineImm() const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
OperandType
Operands are tagged with one of the values of this enum.
Definition: MCInstrDesc.h:44
unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
void enterBasicBlock(MachineBasicBlock &MBB)
Start tracking liveness from the begin of basic block MBB.
static bool isMTBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:427
void moveToVALU(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Replace this instruction&#39;s opcode with the equivalent VALU opcode.
bool hasOneUse(unsigned RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
use_iterator use_begin(unsigned RegNo) const
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
static bool isVOPC(const MachineInstr &MI)
Definition: SIInstrInfo.h:411
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:73
This file provides utility analysis objects describing memory locations.
void setReg(unsigned Reg)
Change the register this operand corresponds to.
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
static MachineOperand CreateImm(int64_t Val)
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:73
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
void setSubReg(unsigned subReg)
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
Whether we must prevent this instruction from executing with EXEC = 0.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
unsigned scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj)
Make a register of the specific register class available and do the appropriate bookkeeping.
bool isHighLatencyInstruction(const MachineInstr &MI) const
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
bool isLowLatencyInstruction(const MachineInstr &MI) const
uint32_t Size
Definition: Profile.cpp:47
static void emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB, const DebugLoc &DL, MachineOperand &Rsrc)
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Abstract Stack Frame Index.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
bool hasOneNonDBGUse(unsigned RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug instruction using the specified regis...
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:346
void setSimpleHint(unsigned VReg, unsigned PrefReg)
Specify the preferred (target independent) register allocation hint for the specified virtual registe...
bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx=nullptr) const
Given the index of a register def operand, check if the register def is tied to a source operand...
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
MachineInstr * removeFromParent()
Unlink &#39;this&#39; from the containing basic block, and return it without deleting it. ...
void changeImmediateDominator(MachineBasicBlock *N, MachineBasicBlock *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node&#39;s...
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Operands with register or 32-bit immediate.
Definition: SIDefines.h:116
SIInstrInfo(const GCNSubtarget &ST)
Definition: SIInstrInfo.cpp:88
bool hasSDWAScalar() const
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
const unsigned Kind
bool isNonUniformBranchInstr(MachineInstr &Instr) const
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
SIEncodingFamily
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:807
bool memoperands_empty() const
Return true if we don&#39;t have any memory operands which described the memory access done by this instr...
Definition: MachineInstr.h:546
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
void setRegUsed(unsigned Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
Definition: SIInstrInfo.h:1009
unsigned readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566
LLVM Value Representation.
Definition: Value.h:73
static bool isSDWA(const MachineInstr &MI)
Definition: SIInstrInfo.h:403
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
MachineInstr * convertToThreeAddress(MachineFunction::iterator &MBB, MachineInstr &MI, LiveVariables *LV) const override
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
static bool isVGPRSpill(const MachineInstr &MI)
Definition: SIInstrInfo.h:511
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
Operand with 32-bit immediate that uses the constant bus.
Definition: SIDefines.h:149
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:175
unsigned getHWRegIndex(unsigned Reg) const
void ChangeToFrameIndex(int Idx)
Replace this operand with a frame index.
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition: MCInstrDesc.h:204
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
void RemoveOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
static void loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc, MachineDominatorTree *MDT)
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
static bool isKillTerminator(unsigned Opcode)
bool getMemOperandWithOffset(MachineInstr &LdSt, MachineOperand *&BaseOp, int64_t &Offset, const TargetRegisterInfo *TRI) const final
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
uint64_t getZExtValue() const
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition: MCInstrDesc.h:67
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
MachineDomTreeNode * addNewBlock(MachineBasicBlock *BB, MachineBasicBlock *DomBB)
addNewBlock - Add a new node to the dominator tree information.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
static bool isVOP1(const MachineInstr &MI)
Definition: SIInstrInfo.h:379
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
void convertNonUniformIfRegion(MachineBasicBlock *IfEntry, MachineBasicBlock *IfEnd) const
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool isBufferSMRD(const MachineInstr &MI) const
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MachineInstr.h:848
bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
Helper class for constructing bundles of MachineInstrs.
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
Definition: MCInstrDesc.h:581
bool isImplicit() const