LLVM  8.0.1
SIShrinkInstructions.cpp
Go to the documentation of this file.
1 //===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 /// The pass tries to use the 32-bit encoding for instructions when possible.
9 //===----------------------------------------------------------------------===//
10 //
11 
12 #include "AMDGPU.h"
13 #include "AMDGPUSubtarget.h"
14 #include "SIInstrInfo.h"
16 #include "llvm/ADT/Statistic.h"
20 #include "llvm/IR/Constants.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/LLVMContext.h"
23 #include "llvm/Support/Debug.h"
26 
27 #define DEBUG_TYPE "si-shrink-instructions"
28 
29 STATISTIC(NumInstructionsShrunk,
30  "Number of 64-bit instruction reduced to 32-bit.");
31 STATISTIC(NumLiteralConstantsFolded,
32  "Number of literal constants folded into 32-bit instructions.");
33 
34 using namespace llvm;
35 
36 namespace {
37 
38 class SIShrinkInstructions : public MachineFunctionPass {
39 public:
40  static char ID;
41 
42 public:
43  SIShrinkInstructions() : MachineFunctionPass(ID) {
44  }
45 
46  bool runOnMachineFunction(MachineFunction &MF) override;
47 
48  StringRef getPassName() const override { return "SI Shrink Instructions"; }
49 
50  void getAnalysisUsage(AnalysisUsage &AU) const override {
51  AU.setPreservesCFG();
53  }
54 };
55 
56 } // End anonymous namespace.
57 
58 INITIALIZE_PASS(SIShrinkInstructions, DEBUG_TYPE,
59  "SI Shrink Instructions", false, false)
60 
61 char SIShrinkInstructions::ID = 0;
62 
64  return new SIShrinkInstructions();
65 }
66 
67 /// This function checks \p MI for operands defined by a move immediate
68 /// instruction and then folds the literal constant into the instruction if it
69 /// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instructions.
71  MachineRegisterInfo &MRI, bool TryToCommute = true) {
72  assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI));
73 
74  int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
75 
76  // Try to fold Src0
77  MachineOperand &Src0 = MI.getOperand(Src0Idx);
78  if (Src0.isReg()) {
79  unsigned Reg = Src0.getReg();
80  if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI.hasOneUse(Reg)) {
81  MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
82  if (Def && Def->isMoveImmediate()) {
83  MachineOperand &MovSrc = Def->getOperand(1);
84  bool ConstantFolded = false;
85 
86  if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) ||
87  isUInt<32>(MovSrc.getImm()))) {
88  // It's possible to have only one component of a super-reg defined by
89  // a single mov, so we need to clear any subregister flag.
90  Src0.setSubReg(0);
91  Src0.ChangeToImmediate(MovSrc.getImm());
92  ConstantFolded = true;
93  } else if (MovSrc.isFI()) {
94  Src0.setSubReg(0);
95  Src0.ChangeToFrameIndex(MovSrc.getIndex());
96  ConstantFolded = true;
97  }
98 
99  if (ConstantFolded) {
100  assert(MRI.use_empty(Reg));
101  Def->eraseFromParent();
102  ++NumLiteralConstantsFolded;
103  return true;
104  }
105  }
106  }
107  }
108 
109  // We have failed to fold src0, so commute the instruction and try again.
110  if (TryToCommute && MI.isCommutable()) {
111  if (TII->commuteInstruction(MI)) {
112  if (foldImmediates(MI, TII, MRI, false))
113  return true;
114 
115  // Commute back.
116  TII->commuteInstruction(MI);
117  }
118  }
119 
120  return false;
121 }
122 
123 static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
124  return isInt<16>(Src.getImm()) &&
125  !TII->isInlineConstant(*Src.getParent(),
126  Src.getParent()->getOperandNo(&Src));
127 }
128 
129 static bool isKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
130  return isUInt<16>(Src.getImm()) &&
131  !TII->isInlineConstant(*Src.getParent(),
132  Src.getParent()->getOperandNo(&Src));
133 }
134 
136  const MachineOperand &Src,
137  bool &IsUnsigned) {
138  if (isInt<16>(Src.getImm())) {
139  IsUnsigned = false;
140  return !TII->isInlineConstant(Src);
141  }
142 
143  if (isUInt<16>(Src.getImm())) {
144  IsUnsigned = true;
145  return !TII->isInlineConstant(Src);
146  }
147 
148  return false;
149 }
150 
151 /// \returns true if the constant in \p Src should be replaced with a bitreverse
152 /// of an inline immediate.
153 static bool isReverseInlineImm(const SIInstrInfo *TII,
154  const MachineOperand &Src,
155  int32_t &ReverseImm) {
156  if (!isInt<32>(Src.getImm()) || TII->isInlineConstant(Src))
157  return false;
158 
159  ReverseImm = reverseBits<int32_t>(static_cast<int32_t>(Src.getImm()));
160  return ReverseImm >= -16 && ReverseImm <= 64;
161 }
162 
163 /// Copy implicit register operands from specified instruction to this
164 /// instruction that are not part of the instruction definition.
166  const MachineInstr &MI) {
167  for (unsigned i = MI.getDesc().getNumOperands() +
168  MI.getDesc().getNumImplicitUses() +
169  MI.getDesc().getNumImplicitDefs(), e = MI.getNumOperands();
170  i != e; ++i) {
171  const MachineOperand &MO = MI.getOperand(i);
172  if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask())
173  NewMI.addOperand(MF, MO);
174  }
175 }
176 
178  // cmpk instructions do scc = dst <cc op> imm16, so commute the instruction to
179  // get constants on the RHS.
180  if (!MI.getOperand(0).isReg())
181  TII->commuteInstruction(MI, false, 0, 1);
182 
183  const MachineOperand &Src1 = MI.getOperand(1);
184  if (!Src1.isImm())
185  return;
186 
187  int SOPKOpc = AMDGPU::getSOPKOp(MI.getOpcode());
188  if (SOPKOpc == -1)
189  return;
190 
191  // eq/ne is special because the imm16 can be treated as signed or unsigned,
192  // and initially selectd to the unsigned versions.
193  if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) {
194  bool HasUImm;
195  if (isKImmOrKUImmOperand(TII, Src1, HasUImm)) {
196  if (!HasUImm) {
197  SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ?
198  AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32;
199  }
200 
201  MI.setDesc(TII->get(SOPKOpc));
202  }
203 
204  return;
205  }
206 
207  const MCInstrDesc &NewDesc = TII->get(SOPKOpc);
208 
209  if ((TII->sopkIsZext(SOPKOpc) && isKUImmOperand(TII, Src1)) ||
210  (!TII->sopkIsZext(SOPKOpc) && isKImmOperand(TII, Src1))) {
211  MI.setDesc(NewDesc);
212  }
213 }
214 
215 /// Attempt to shink AND/OR/XOR operations requiring non-inlineable literals.
216 /// For AND or OR, try using S_BITSET{0,1} to clear or set bits.
217 /// If the inverse of the immediate is legal, use ANDN2, ORN2 or
218 /// XNOR (as a ^ b == ~(a ^ ~b)).
219 /// \returns true if the caller should continue the machine function iterator
222  const SIInstrInfo *TII,
223  MachineInstr &MI) {
224  unsigned Opc = MI.getOpcode();
225  const MachineOperand *Dest = &MI.getOperand(0);
226  MachineOperand *Src0 = &MI.getOperand(1);
227  MachineOperand *Src1 = &MI.getOperand(2);
228  MachineOperand *SrcReg = Src0;
229  MachineOperand *SrcImm = Src1;
230 
231  if (SrcImm->isImm() &&
233  uint32_t Imm = static_cast<uint32_t>(SrcImm->getImm());
234  uint32_t NewImm = 0;
235 
236  if (Opc == AMDGPU::S_AND_B32) {
237  if (isPowerOf2_32(~Imm)) {
238  NewImm = countTrailingOnes(Imm);
239  Opc = AMDGPU::S_BITSET0_B32;
240  } else if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
241  NewImm = ~Imm;
242  Opc = AMDGPU::S_ANDN2_B32;
243  }
244  } else if (Opc == AMDGPU::S_OR_B32) {
245  if (isPowerOf2_32(Imm)) {
246  NewImm = countTrailingZeros(Imm);
247  Opc = AMDGPU::S_BITSET1_B32;
248  } else if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
249  NewImm = ~Imm;
250  Opc = AMDGPU::S_ORN2_B32;
251  }
252  } else if (Opc == AMDGPU::S_XOR_B32) {
254  NewImm = ~Imm;
255  Opc = AMDGPU::S_XNOR_B32;
256  }
257  } else {
258  llvm_unreachable("unexpected opcode");
259  }
260 
261  if ((Opc == AMDGPU::S_ANDN2_B32 || Opc == AMDGPU::S_ORN2_B32) &&
262  SrcImm == Src0) {
263  if (!TII->commuteInstruction(MI, false, 1, 2))
264  NewImm = 0;
265  }
266 
267  if (NewImm != 0) {
269  SrcReg->isReg()) {
270  MRI.setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg());
271  MRI.setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg());
272  return true;
273  }
274 
275  if (SrcReg->isReg() && SrcReg->getReg() == Dest->getReg()) {
276  MI.setDesc(TII->get(Opc));
277  if (Opc == AMDGPU::S_BITSET0_B32 ||
278  Opc == AMDGPU::S_BITSET1_B32) {
279  Src0->ChangeToImmediate(NewImm);
280  MI.RemoveOperand(2);
281  } else {
282  SrcImm->setImm(NewImm);
283  }
284  }
285  }
286  }
287 
288  return false;
289 }
290 
291 // This is the same as MachineInstr::readsRegister/modifiesRegister except
292 // it takes subregs into account.
294  unsigned Reg, unsigned SubReg,
295  const SIRegisterInfo &TRI) {
296  for (const MachineOperand &MO : R) {
297  if (!MO.isReg())
298  continue;
299 
302  if (TRI.regsOverlap(Reg, MO.getReg()))
303  return true;
304  } else if (MO.getReg() == Reg &&
306  LaneBitmask Overlap = TRI.getSubRegIndexLaneMask(SubReg) &
307  TRI.getSubRegIndexLaneMask(MO.getSubReg());
308  if (Overlap.any())
309  return true;
310  }
311  }
312  return false;
313 }
314 
315 static bool instReadsReg(const MachineInstr *MI,
316  unsigned Reg, unsigned SubReg,
317  const SIRegisterInfo &TRI) {
318  return instAccessReg(MI->uses(), Reg, SubReg, TRI);
319 }
320 
321 static bool instModifiesReg(const MachineInstr *MI,
322  unsigned Reg, unsigned SubReg,
323  const SIRegisterInfo &TRI) {
324  return instAccessReg(MI->defs(), Reg, SubReg, TRI);
325 }
326 
328 getSubRegForIndex(unsigned Reg, unsigned Sub, unsigned I,
329  const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI) {
330  if (TRI.getRegSizeInBits(Reg, MRI) != 32) {
332  Reg = TRI.getSubReg(Reg, TRI.getSubRegFromChannel(I));
333  } else {
334  LaneBitmask LM = TRI.getSubRegIndexLaneMask(Sub);
336  }
337  }
338  return TargetInstrInfo::RegSubRegPair(Reg, Sub);
339 }
340 
341 // Match:
342 // mov t, x
343 // mov x, y
344 // mov y, t
345 //
346 // =>
347 //
348 // mov t, x (t is potentially dead and move eliminated)
349 // v_swap_b32 x, y
350 //
351 // Returns next valid instruction pointer if was able to create v_swap_b32.
352 //
353 // This shall not be done too early not to prevent possible folding which may
354 // remove matched moves, and this should prefereably be done before RA to
355 // release saved registers and also possibly after RA which can insert copies
356 // too.
357 //
358 // This is really just a generic peephole that is not a canocical shrinking,
359 // although requirements match the pass placement and it reduces code size too.
361  const SIInstrInfo *TII) {
362  assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
363  MovT.getOpcode() == AMDGPU::COPY);
364 
365  unsigned T = MovT.getOperand(0).getReg();
366  unsigned Tsub = MovT.getOperand(0).getSubReg();
367  MachineOperand &Xop = MovT.getOperand(1);
368 
369  if (!Xop.isReg())
370  return nullptr;
371  unsigned X = Xop.getReg();
372  unsigned Xsub = Xop.getSubReg();
373 
374  unsigned Size = TII->getOpSize(MovT, 0) / 4;
375 
376  const SIRegisterInfo &TRI = TII->getRegisterInfo();
377  if (!TRI.isVGPR(MRI, X))
378  return nullptr;
379 
380  for (MachineOperand &YTop : MRI.use_nodbg_operands(T)) {
381  if (YTop.getSubReg() != Tsub)
382  continue;
383 
384  MachineInstr &MovY = *YTop.getParent();
385  if ((MovY.getOpcode() != AMDGPU::V_MOV_B32_e32 &&
386  MovY.getOpcode() != AMDGPU::COPY) ||
387  MovY.getOperand(1).getSubReg() != Tsub)
388  continue;
389 
390  unsigned Y = MovY.getOperand(0).getReg();
391  unsigned Ysub = MovY.getOperand(0).getSubReg();
392 
393  if (!TRI.isVGPR(MRI, Y) || MovT.getParent() != MovY.getParent())
394  continue;
395 
396  MachineInstr *MovX = nullptr;
397  auto I = std::next(MovT.getIterator()), E = MovT.getParent()->instr_end();
398  for (auto IY = MovY.getIterator(); I != E && I != IY; ++I) {
399  if (instReadsReg(&*I, X, Xsub, TRI) ||
400  instModifiesReg(&*I, Y, Ysub, TRI) ||
401  instModifiesReg(&*I, T, Tsub, TRI) ||
402  (MovX && instModifiesReg(&*I, X, Xsub, TRI))) {
403  MovX = nullptr;
404  break;
405  }
406  if (!instReadsReg(&*I, Y, Ysub, TRI)) {
407  if (!MovX && instModifiesReg(&*I, X, Xsub, TRI)) {
408  MovX = nullptr;
409  break;
410  }
411  continue;
412  }
413  if (MovX ||
414  (I->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
415  I->getOpcode() != AMDGPU::COPY) ||
416  I->getOperand(0).getReg() != X ||
417  I->getOperand(0).getSubReg() != Xsub) {
418  MovX = nullptr;
419  break;
420  }
421  MovX = &*I;
422  }
423 
424  if (!MovX || I == E)
425  continue;
426 
427  LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << MovY);
428 
429  for (unsigned I = 0; I < Size; ++I) {
431  X1 = getSubRegForIndex(X, Xsub, I, TRI, MRI);
432  Y1 = getSubRegForIndex(Y, Ysub, I, TRI, MRI);
433  BuildMI(*MovT.getParent(), MovX->getIterator(), MovT.getDebugLoc(),
434  TII->get(AMDGPU::V_SWAP_B32))
435  .addDef(X1.Reg, 0, X1.SubReg)
436  .addDef(Y1.Reg, 0, Y1.SubReg)
437  .addReg(Y1.Reg, 0, Y1.SubReg)
438  .addReg(X1.Reg, 0, X1.SubReg).getInstr();
439  }
440  MovX->eraseFromParent();
441  MovY.eraseFromParent();
442  MachineInstr *Next = &*std::next(MovT.getIterator());
443  if (MRI.use_nodbg_empty(T))
444  MovT.eraseFromParent();
445  else
446  Xop.setIsKill(false);
447 
448  return Next;
449  }
450 
451  return nullptr;
452 }
453 
454 bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
455  if (skipFunction(MF.getFunction()))
456  return false;
457 
459  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
460  const SIInstrInfo *TII = ST.getInstrInfo();
461 
462  std::vector<unsigned> I1Defs;
463 
464  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
465  BI != BE; ++BI) {
466 
467  MachineBasicBlock &MBB = *BI;
469  for (I = MBB.begin(); I != MBB.end(); I = Next) {
470  Next = std::next(I);
471  MachineInstr &MI = *I;
472 
473  if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32) {
474  // If this has a literal constant source that is the same as the
475  // reversed bits of an inline immediate, replace with a bitreverse of
476  // that constant. This saves 4 bytes in the common case of materializing
477  // sign bits.
478 
479  // Test if we are after regalloc. We only want to do this after any
480  // optimizations happen because this will confuse them.
481  // XXX - not exactly a check for post-regalloc run.
482  MachineOperand &Src = MI.getOperand(1);
483  if (Src.isImm() &&
485  int32_t ReverseImm;
486  if (isReverseInlineImm(TII, Src, ReverseImm)) {
487  MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32));
488  Src.setImm(ReverseImm);
489  continue;
490  }
491  }
492  }
493 
494  if (ST.hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
495  MI.getOpcode() == AMDGPU::COPY)) {
496  if (auto *NextMI = matchSwap(MI, MRI, TII)) {
497  Next = NextMI->getIterator();
498  continue;
499  }
500  }
501 
502  // Combine adjacent s_nops to use the immediate operand encoding how long
503  // to wait.
504  //
505  // s_nop N
506  // s_nop M
507  // =>
508  // s_nop (N + M)
509  if (MI.getOpcode() == AMDGPU::S_NOP &&
510  Next != MBB.end() &&
511  (*Next).getOpcode() == AMDGPU::S_NOP) {
512 
513  MachineInstr &NextMI = *Next;
514  // The instruction encodes the amount to wait with an offset of 1,
515  // i.e. 0 is wait 1 cycle. Convert both to cycles and then convert back
516  // after adding.
517  uint8_t Nop0 = MI.getOperand(0).getImm() + 1;
518  uint8_t Nop1 = NextMI.getOperand(0).getImm() + 1;
519 
520  // Make sure we don't overflow the bounds.
521  if (Nop0 + Nop1 <= 8) {
522  NextMI.getOperand(0).setImm(Nop0 + Nop1 - 1);
523  MI.eraseFromParent();
524  }
525 
526  continue;
527  }
528 
529  // FIXME: We also need to consider movs of constant operands since
530  // immediate operands are not folded if they have more than one use, and
531  // the operand folding pass is unaware if the immediate will be free since
532  // it won't know if the src == dest constraint will end up being
533  // satisfied.
534  if (MI.getOpcode() == AMDGPU::S_ADD_I32 ||
535  MI.getOpcode() == AMDGPU::S_MUL_I32) {
536  const MachineOperand *Dest = &MI.getOperand(0);
537  MachineOperand *Src0 = &MI.getOperand(1);
538  MachineOperand *Src1 = &MI.getOperand(2);
539 
540  if (!Src0->isReg() && Src1->isReg()) {
541  if (TII->commuteInstruction(MI, false, 1, 2))
542  std::swap(Src0, Src1);
543  }
544 
545  // FIXME: This could work better if hints worked with subregisters. If
546  // we have a vector add of a constant, we usually don't get the correct
547  // allocation due to the subregister usage.
549  Src0->isReg()) {
550  MRI.setRegAllocationHint(Dest->getReg(), 0, Src0->getReg());
551  MRI.setRegAllocationHint(Src0->getReg(), 0, Dest->getReg());
552  continue;
553  }
554 
555  if (Src0->isReg() && Src0->getReg() == Dest->getReg()) {
556  if (Src1->isImm() && isKImmOperand(TII, *Src1)) {
557  unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ?
558  AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32;
559 
560  MI.setDesc(TII->get(Opc));
561  MI.tieOperands(0, 1);
562  }
563  }
564  }
565 
566  // Try to use s_cmpk_*
567  if (MI.isCompare() && TII->isSOPC(MI)) {
568  shrinkScalarCompare(TII, MI);
569  continue;
570  }
571 
572  // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
573  if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
574  const MachineOperand &Dst = MI.getOperand(0);
575  MachineOperand &Src = MI.getOperand(1);
576 
577  if (Src.isImm() &&
579  int32_t ReverseImm;
580  if (isKImmOperand(TII, Src))
581  MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
582  else if (isReverseInlineImm(TII, Src, ReverseImm)) {
583  MI.setDesc(TII->get(AMDGPU::S_BREV_B32));
584  Src.setImm(ReverseImm);
585  }
586  }
587 
588  continue;
589  }
590 
591  // Shrink scalar logic operations.
592  if (MI.getOpcode() == AMDGPU::S_AND_B32 ||
593  MI.getOpcode() == AMDGPU::S_OR_B32 ||
594  MI.getOpcode() == AMDGPU::S_XOR_B32) {
595  if (shrinkScalarLogicOp(ST, MRI, TII, MI))
596  continue;
597  }
598 
599  if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
600  continue;
601 
602  if (!TII->canShrink(MI, MRI)) {
603  // Try commuting the instruction and see if that enables us to shrink
604  // it.
605  if (!MI.isCommutable() || !TII->commuteInstruction(MI) ||
606  !TII->canShrink(MI, MRI))
607  continue;
608  }
609 
610  // getVOPe32 could be -1 here if we started with an instruction that had
611  // a 32-bit encoding and then commuted it to an instruction that did not.
612  if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
613  continue;
614 
615  int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
616 
617  if (TII->isVOPC(Op32)) {
618  unsigned DstReg = MI.getOperand(0).getReg();
620  // VOPC instructions can only write to the VCC register. We can't
621  // force them to use VCC here, because this is only one register and
622  // cannot deal with sequences which would require multiple copies of
623  // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...)
624  //
625  // So, instead of forcing the instruction to write to VCC, we provide
626  // a hint to the register allocator to use VCC and then we will run
627  // this pass again after RA and shrink it if it outputs to VCC.
628  MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC);
629  continue;
630  }
631  if (DstReg != AMDGPU::VCC)
632  continue;
633  }
634 
635  if (Op32 == AMDGPU::V_CNDMASK_B32_e32) {
636  // We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC
637  // instructions.
638  const MachineOperand *Src2 =
639  TII->getNamedOperand(MI, AMDGPU::OpName::src2);
640  if (!Src2->isReg())
641  continue;
642  unsigned SReg = Src2->getReg();
644  MRI.setRegAllocationHint(SReg, 0, AMDGPU::VCC);
645  continue;
646  }
647  if (SReg != AMDGPU::VCC)
648  continue;
649  }
650 
651  // Check for the bool flag output for instructions like V_ADD_I32_e64.
652  const MachineOperand *SDst = TII->getNamedOperand(MI,
653  AMDGPU::OpName::sdst);
654 
655  // Check the carry-in operand for v_addc_u32_e64.
656  const MachineOperand *Src2 = TII->getNamedOperand(MI,
657  AMDGPU::OpName::src2);
658 
659  if (SDst) {
660  if (SDst->getReg() != AMDGPU::VCC) {
662  MRI.setRegAllocationHint(SDst->getReg(), 0, AMDGPU::VCC);
663  continue;
664  }
665 
666  // All of the instructions with carry outs also have an SGPR input in
667  // src2.
668  if (Src2 && Src2->getReg() != AMDGPU::VCC) {
670  MRI.setRegAllocationHint(Src2->getReg(), 0, AMDGPU::VCC);
671 
672  continue;
673  }
674  }
675 
676  // We can shrink this instruction
677  LLVM_DEBUG(dbgs() << "Shrinking " << MI);
678 
679  MachineInstr *Inst32 = TII->buildShrunkInst(MI, Op32);
680  ++NumInstructionsShrunk;
681 
682  // Copy extra operands not present in the instruction definition.
683  copyExtraImplicitOps(*Inst32, MF, MI);
684 
685  MI.eraseFromParent();
686  foldImmediates(*Inst32, TII, MRI);
687 
688  LLVM_DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');
689  }
690  }
691  return false;
692 }
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
unsigned getNumImplicitUses() const
Return the number of implicit uses this instruction has.
Definition: MCInstrDesc.h:527
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:349
static bool isReverseInlineImm(const SIInstrInfo *TII, const MachineOperand &Src, int32_t &ReverseImm)
static unsigned getSubRegFromChannel(unsigned Channel)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
AMDGPU specific subclass of TargetSubtarget.
instr_iterator instr_end()
unsigned getNumImplicitDefs() const
Return the number of implicit defs this instruct has.
Definition: MCInstrDesc.h:549
This class represents lattice values for constants.
Definition: AllocatorList.h:24
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:492
static bool sopkIsZext(const MachineInstr &MI)
Definition: SIInstrInfo.h:563
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:383
iterator_range< use_nodbg_iterator > use_nodbg_operands(unsigned Reg) const
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:164
unsigned getReg() const
getReg - Returns the register number.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:509
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned Reg
unsigned getSubReg() const
const SIInstrInfo * getInstrInfo() const override
static bool instModifiesReg(const MachineInstr *MI, unsigned Reg, unsigned SubReg, const SIRegisterInfo &TRI)
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:306
unsigned const TargetRegisterInfo * TRI
bool isInlineConstant(const APInt &Imm) const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:165
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction...
Definition: MachineInstr.h:700
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:211
const HexagonInstrInfo * TII
static TargetInstrInfo::RegSubRegPair getSubRegForIndex(unsigned Reg, unsigned Sub, unsigned I, const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI)
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:412
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:478
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned SubReg
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
static bool isKImmOrKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src, bool &IsUnsigned)
static bool instAccessReg(iterator_range< MachineInstr::const_mop_iterator > &&R, unsigned Reg, unsigned SubReg, const SIRegisterInfo &TRI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
Definition: SIInstrInfo.h:738
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:406
STATISTIC(NumInstructionsShrunk, "Number of 64-bit instruction reduced to 32-bit.")
#define DEBUG_TYPE
The pass tries to use the 32-bit encoding for instructions when possible.
static bool isKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src)
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
TargetInstrInfo::RegSubRegPair RegSubRegPair
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned const MachineRegisterInfo * MRI
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:120
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:429
static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool isCompare(QueryType Type=IgnoreBundle) const
Return true if this instruction is a comparison.
Definition: MachineInstr.h:694
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool hasSwap() const
Represent the analysis usage information of a pass.
LLVM_READONLY int getSOPKOp(uint16_t Opcode)
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
Definition: MachineInstr.h:481
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
self_iterator getIterator()
Definition: ilist_node.h:82
constexpr Type getAsInteger() const
Definition: LaneBitmask.h:74
static bool isVOP2(const MachineInstr &MI)
Definition: SIInstrInfo.h:387
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setIsKill(bool Val=true)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:34
static bool instReadsReg(const MachineInstr *MI, unsigned Reg, unsigned SubReg, const SIRegisterInfo &TRI)
Iterator for intrusive lists based on ilist_node.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:309
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_READONLY int getVOPe32(uint16_t Opcode)
MachineOperand class - Representation of each machine instruction operand.
void setRegAllocationHint(unsigned VReg, unsigned Type, unsigned PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register...
A pair composed of a register and a sub-register index.
static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src)
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly. ...
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:286
int64_t getImm() const
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
A range adaptor for a pair of iterators.
static MachineInstr * matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI, const SIInstrInfo *TII)
bool use_empty(unsigned RegNo) const
use_empty - Return true if there are no instructions using the specified register.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:254
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
static void copyExtraImplicitOps(MachineInstr &NewMI, MachineFunction &MF, const MachineInstr &MI)
Copy implicit register operands from specified instruction to this instruction that are not part of t...
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
bool hasInv2PiInlineImm() const
Interface definition for SIInstrInfo.
FunctionPass * createSIShrinkInstructionsPass()
bool hasOneUse(unsigned RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static bool isVOPC(const MachineInstr &MI)
Definition: SIInstrInfo.h:411
#define I(x, y, z)
Definition: MD5.cpp:58
constexpr bool any() const
Definition: LaneBitmask.h:53
void setSubReg(unsigned subReg)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
uint32_t Size
Definition: Profile.cpp:47
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:346
bool isReg() const
isReg - Tests if this is a MO_Register operand.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool shrinkScalarLogicOp(const GCNSubtarget &ST, MachineRegisterInfo &MRI, const SIInstrInfo *TII, MachineInstr &MI)
Attempt to shink AND/OR/XOR operations requiring non-inlineable literals.
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
void RemoveOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
const MachineInstrBuilder & addDef(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
#define LLVM_DEBUG(X)
Definition: Debug.h:123
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
static bool isVOP1(const MachineInstr &MI)
Definition: SIInstrInfo.h:379
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MachineInstr.h:848
static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, MachineRegisterInfo &MRI, bool TryToCommute=true)
This function checks MI for operands defined by a move immediate instruction and then folds the liter...
bool isImplicit() const
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.