LLVM  8.0.1
SIFoldOperands.cpp
Go to the documentation of this file.
1 //===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 /// \file
9 //===----------------------------------------------------------------------===//
10 //
11 
12 #include "AMDGPU.h"
13 #include "AMDGPUSubtarget.h"
14 #include "SIInstrInfo.h"
15 #include "SIMachineFunctionInfo.h"
22 #include "llvm/Support/Debug.h"
25 
26 #define DEBUG_TYPE "si-fold-operands"
27 using namespace llvm;
28 
29 namespace {
30 
31 struct FoldCandidate {
33  union {
34  MachineOperand *OpToFold;
35  uint64_t ImmToFold;
36  int FrameIndexToFold;
37  };
38  int ShrinkOpcode;
39  unsigned char UseOpNo;
41  bool Commuted;
42 
43  FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp,
44  bool Commuted_ = false,
45  int ShrinkOp = -1) :
46  UseMI(MI), OpToFold(nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
47  Kind(FoldOp->getType()),
48  Commuted(Commuted_) {
49  if (FoldOp->isImm()) {
50  ImmToFold = FoldOp->getImm();
51  } else if (FoldOp->isFI()) {
52  FrameIndexToFold = FoldOp->getIndex();
53  } else {
54  assert(FoldOp->isReg());
55  OpToFold = FoldOp;
56  }
57  }
58 
59  bool isFI() const {
60  return Kind == MachineOperand::MO_FrameIndex;
61  }
62 
63  bool isImm() const {
64  return Kind == MachineOperand::MO_Immediate;
65  }
66 
67  bool isReg() const {
68  return Kind == MachineOperand::MO_Register;
69  }
70 
71  bool isCommuted() const {
72  return Commuted;
73  }
74 
75  bool needsShrink() const {
76  return ShrinkOpcode != -1;
77  }
78 
79  int getShrinkOpcode() const {
80  return ShrinkOpcode;
81  }
82 };
83 
84 class SIFoldOperands : public MachineFunctionPass {
85 public:
86  static char ID;
88  const SIInstrInfo *TII;
89  const SIRegisterInfo *TRI;
90  const GCNSubtarget *ST;
91 
92  void foldOperand(MachineOperand &OpToFold,
94  unsigned UseOpIdx,
96  SmallVectorImpl<MachineInstr *> &CopiesToReplace) const;
97 
98  void foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const;
99 
100  const MachineOperand *isClamp(const MachineInstr &MI) const;
101  bool tryFoldClamp(MachineInstr &MI);
102 
103  std::pair<const MachineOperand *, int> isOMod(const MachineInstr &MI) const;
104  bool tryFoldOMod(MachineInstr &MI);
105 
106 public:
107  SIFoldOperands() : MachineFunctionPass(ID) {
109  }
110 
111  bool runOnMachineFunction(MachineFunction &MF) override;
112 
113  StringRef getPassName() const override { return "SI Fold Operands"; }
114 
115  void getAnalysisUsage(AnalysisUsage &AU) const override {
116  AU.setPreservesCFG();
118  }
119 };
120 
121 } // End anonymous namespace.
122 
123 INITIALIZE_PASS(SIFoldOperands, DEBUG_TYPE,
124  "SI Fold Operands", false, false)
125 
126 char SIFoldOperands::ID = 0;
127 
129 
130 // Wrapper around isInlineConstant that understands special cases when
131 // instruction types are replaced during operand folding.
133  const MachineInstr &UseMI,
134  unsigned OpNo,
135  const MachineOperand &OpToFold) {
136  if (TII->isInlineConstant(UseMI, OpNo, OpToFold))
137  return true;
138 
139  unsigned Opc = UseMI.getOpcode();
140  switch (Opc) {
141  case AMDGPU::V_MAC_F32_e64:
142  case AMDGPU::V_MAC_F16_e64:
143  case AMDGPU::V_FMAC_F32_e64: {
144  // Special case for mac. Since this is replaced with mad when folded into
145  // src2, we need to check the legality for the final instruction.
146  int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
147  if (static_cast<int>(OpNo) == Src2Idx) {
148  bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64;
149  bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64;
150 
151  unsigned Opc = IsFMA ?
152  AMDGPU::V_FMA_F32 : (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
153  const MCInstrDesc &MadDesc = TII->get(Opc);
154  return TII->isInlineConstant(OpToFold, MadDesc.OpInfo[OpNo].OperandType);
155  }
156  return false;
157  }
158  default:
159  return false;
160  }
161 }
162 
164  return new SIFoldOperands();
165 }
166 
167 static bool updateOperand(FoldCandidate &Fold,
168  const SIInstrInfo &TII,
169  const TargetRegisterInfo &TRI) {
170  MachineInstr *MI = Fold.UseMI;
171  MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
172  assert(Old.isReg());
173 
174  if (Fold.isImm()) {
175  if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked) {
176  // Set op_sel/op_sel_hi on this operand or bail out if op_sel is
177  // already set.
178  unsigned Opcode = MI->getOpcode();
179  int OpNo = MI->getOperandNo(&Old);
180  int ModIdx = -1;
181  if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0))
182  ModIdx = AMDGPU::OpName::src0_modifiers;
183  else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1))
184  ModIdx = AMDGPU::OpName::src1_modifiers;
185  else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2))
186  ModIdx = AMDGPU::OpName::src2_modifiers;
187  assert(ModIdx != -1);
188  ModIdx = AMDGPU::getNamedOperandIdx(Opcode, ModIdx);
189  MachineOperand &Mod = MI->getOperand(ModIdx);
190  unsigned Val = Mod.getImm();
191  if ((Val & SISrcMods::OP_SEL_0) || !(Val & SISrcMods::OP_SEL_1))
192  return false;
193  // If upper part is all zero we do not need op_sel_hi.
194  if (!isUInt<16>(Fold.ImmToFold)) {
195  if (!(Fold.ImmToFold & 0xffff)) {
196  Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0);
197  Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
198  Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff);
199  return true;
200  }
201  Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
202  }
203  }
204  }
205 
206  if ((Fold.isImm() || Fold.isFI()) && Fold.needsShrink()) {
207  MachineBasicBlock *MBB = MI->getParent();
208  auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
209  if (Liveness != MachineBasicBlock::LQR_Dead)
210  return false;
211 
213  int Op32 = Fold.getShrinkOpcode();
214  MachineOperand &Dst0 = MI->getOperand(0);
215  MachineOperand &Dst1 = MI->getOperand(1);
216  assert(Dst0.isDef() && Dst1.isDef());
217 
218  bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
219 
220  const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
221  unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
222 
223  MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
224 
225  if (HaveNonDbgCarryUse) {
226  BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg())
227  .addReg(AMDGPU::VCC, RegState::Kill);
228  }
229 
230  // Keep the old instruction around to avoid breaking iterators, but
231  // replace it with a dummy instruction to remove uses.
232  //
233  // FIXME: We should not invert how this pass looks at operands to avoid
234  // this. Should track set of foldable movs instead of looking for uses
235  // when looking at a use.
236  Dst0.setReg(NewReg0);
237  for (unsigned I = MI->getNumOperands() - 1; I > 0; --I)
238  MI->RemoveOperand(I);
239  MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF));
240 
241  if (Fold.isCommuted())
242  TII.commuteInstruction(*Inst32, false);
243  return true;
244  }
245 
246  assert(!Fold.needsShrink() && "not handled");
247 
248  if (Fold.isImm()) {
249  Old.ChangeToImmediate(Fold.ImmToFold);
250  return true;
251  }
252 
253  if (Fold.isFI()) {
254  Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
255  return true;
256  }
257 
258  MachineOperand *New = Fold.OpToFold;
261  Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);
262 
263  Old.setIsUndef(New->isUndef());
264  return true;
265  }
266 
267  // FIXME: Handle physical registers.
268 
269  return false;
270 }
271 
273  const MachineInstr *MI) {
274  for (auto Candidate : FoldList) {
275  if (Candidate.UseMI == MI)
276  return true;
277  }
278  return false;
279 }
280 
282  MachineInstr *MI, unsigned OpNo,
283  MachineOperand *OpToFold,
284  const SIInstrInfo *TII) {
285  if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
286 
287  // Special case for v_mac_{f16, f32}_e64 if we are trying to fold into src2
288  unsigned Opc = MI->getOpcode();
289  if ((Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
290  Opc == AMDGPU::V_FMAC_F32_e64) &&
291  (int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) {
292  bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64;
293  bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64;
294  unsigned NewOpc = IsFMA ?
295  AMDGPU::V_FMA_F32 : (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
296 
297  // Check if changing this to a v_mad_{f16, f32} instruction will allow us
298  // to fold the operand.
299  MI->setDesc(TII->get(NewOpc));
300  bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold, TII);
301  if (FoldAsMAD) {
302  MI->untieRegOperand(OpNo);
303  return true;
304  }
305  MI->setDesc(TII->get(Opc));
306  }
307 
308  // Special case for s_setreg_b32
309  if (Opc == AMDGPU::S_SETREG_B32 && OpToFold->isImm()) {
310  MI->setDesc(TII->get(AMDGPU::S_SETREG_IMM32_B32));
311  FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
312  return true;
313  }
314 
315  // If we are already folding into another operand of MI, then
316  // we can't commute the instruction, otherwise we risk making the
317  // other fold illegal.
318  if (isUseMIInFoldList(FoldList, MI))
319  return false;
320 
321  unsigned CommuteOpNo = OpNo;
322 
323  // Operand is not legal, so try to commute the instruction to
324  // see if this makes it possible to fold.
325  unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex;
326  unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
327  bool CanCommute = TII->findCommutedOpIndices(*MI, CommuteIdx0, CommuteIdx1);
328 
329  if (CanCommute) {
330  if (CommuteIdx0 == OpNo)
331  CommuteOpNo = CommuteIdx1;
332  else if (CommuteIdx1 == OpNo)
333  CommuteOpNo = CommuteIdx0;
334  }
335 
336 
337  // One of operands might be an Imm operand, and OpNo may refer to it after
338  // the call of commuteInstruction() below. Such situations are avoided
339  // here explicitly as OpNo must be a register operand to be a candidate
340  // for memory folding.
341  if (CanCommute && (!MI->getOperand(CommuteIdx0).isReg() ||
342  !MI->getOperand(CommuteIdx1).isReg()))
343  return false;
344 
345  if (!CanCommute ||
346  !TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1))
347  return false;
348 
349  if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) {
350  if ((Opc == AMDGPU::V_ADD_I32_e64 ||
351  Opc == AMDGPU::V_SUB_I32_e64 ||
352  Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME
353  (OpToFold->isImm() || OpToFold->isFI())) {
355 
356  // Verify the other operand is a VGPR, otherwise we would violate the
357  // constant bus restriction.
358  unsigned OtherIdx = CommuteOpNo == CommuteIdx0 ? CommuteIdx1 : CommuteIdx0;
359  MachineOperand &OtherOp = MI->getOperand(OtherIdx);
360  if (!OtherOp.isReg() ||
361  !TII->getRegisterInfo().isVGPR(MRI, OtherOp.getReg()))
362  return false;
363 
364  assert(MI->getOperand(1).isDef());
365 
366  // Make sure to get the 32-bit version of the commuted opcode.
367  unsigned MaybeCommutedOpc = MI->getOpcode();
368  int Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc);
369 
370  FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true,
371  Op32));
372  return true;
373  }
374 
375  TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1);
376  return false;
377  }
378 
379  FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true));
380  return true;
381  }
382 
383  FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
384  return true;
385 }
386 
387 // If the use operand doesn't care about the value, this may be an operand only
388 // used for register indexing, in which case it is unsafe to fold.
389 static bool isUseSafeToFold(const SIInstrInfo *TII,
390  const MachineInstr &MI,
391  const MachineOperand &UseMO) {
392  return !UseMO.isUndef() && !TII->isSDWA(MI);
393  //return !MI.hasRegisterImplicitUseOperand(UseMO.getReg());
394 }
395 
396 void SIFoldOperands::foldOperand(
397  MachineOperand &OpToFold,
399  unsigned UseOpIdx,
401  SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
402  const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
403 
404  if (!isUseSafeToFold(TII, *UseMI, UseOp))
405  return;
406 
407  // FIXME: Fold operands with subregs.
408  if (UseOp.isReg() && OpToFold.isReg()) {
409  if (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister)
410  return;
411 
412  // Don't fold subregister extracts into tied operands, only if it is a full
413  // copy since a subregister use tied to a full register def doesn't really
414  // make sense. e.g. don't fold:
415  //
416  // %1 = COPY %0:sub1
417  // %2<tied3> = V_MAC_{F16, F32} %3, %4, %1<tied0>
418  //
419  // into
420  // %2<tied3> = V_MAC_{F16, F32} %3, %4, %0:sub1<tied0>
421  if (UseOp.isTied() && OpToFold.getSubReg() != AMDGPU::NoSubRegister)
422  return;
423  }
424 
425  // Special case for REG_SEQUENCE: We can't fold literals into
426  // REG_SEQUENCE instructions, so we have to fold them into the
427  // uses of REG_SEQUENCE.
428  if (UseMI->isRegSequence()) {
429  unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
430  unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
431 
433  RSUse = MRI->use_begin(RegSeqDstReg), RSE = MRI->use_end();
434  RSUse != RSE; ++RSUse) {
435 
436  MachineInstr *RSUseMI = RSUse->getParent();
437  if (RSUse->getSubReg() != RegSeqDstSubReg)
438  continue;
439 
440  foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
441  CopiesToReplace);
442  }
443 
444  return;
445  }
446 
447 
448  bool FoldingImm = OpToFold.isImm();
449 
450  if (FoldingImm && UseMI->isCopy()) {
451  unsigned DestReg = UseMI->getOperand(0).getReg();
452  const TargetRegisterClass *DestRC
454  MRI->getRegClass(DestReg) :
455  TRI->getPhysRegClass(DestReg);
456 
457  unsigned SrcReg = UseMI->getOperand(1).getReg();
460  const TargetRegisterClass * SrcRC = MRI->getRegClass(SrcReg);
461  if (TRI->isSGPRClass(SrcRC) && TRI->hasVGPRs(DestRC)) {
465  Use = MRI->use_begin(DestReg), E = MRI->use_end();
466  Use != E; Use = NextUse) {
467  NextUse = std::next(Use);
468  FoldCandidate FC = FoldCandidate(Use->getParent(),
469  Use.getOperandNo(), &UseMI->getOperand(1));
470  CopyUses.push_back(FC);
471  }
472  for (auto & F : CopyUses) {
473  foldOperand(*F.OpToFold, F.UseMI, F.UseOpNo,
474  FoldList, CopiesToReplace);
475  }
476  }
477  }
478 
479  // In order to fold immediates into copies, we need to change the
480  // copy to a MOV.
481 
482  unsigned MovOp = TII->getMovOpcode(DestRC);
483  if (MovOp == AMDGPU::COPY)
484  return;
485 
486  UseMI->setDesc(TII->get(MovOp));
487  CopiesToReplace.push_back(UseMI);
488  } else {
489  if (UseMI->isCopy() && OpToFold.isReg() &&
492  TRI->isVGPR(*MRI, UseMI->getOperand(0).getReg()) &&
493  TRI->isVGPR(*MRI, UseMI->getOperand(1).getReg()) &&
494  !UseMI->getOperand(1).getSubReg()) {
495  UseMI->getOperand(1).setReg(OpToFold.getReg());
496  UseMI->getOperand(1).setSubReg(OpToFold.getSubReg());
497  UseMI->getOperand(1).setIsKill(false);
498  CopiesToReplace.push_back(UseMI);
499  OpToFold.setIsKill(false);
500  return;
501  }
502 
503  const MCInstrDesc &UseDesc = UseMI->getDesc();
504 
505  // Don't fold into target independent nodes. Target independent opcodes
506  // don't have defined register classes.
507  if (UseDesc.isVariadic() ||
508  UseOp.isImplicit() ||
509  UseDesc.OpInfo[UseOpIdx].RegClass == -1)
510  return;
511  }
512 
513  if (!FoldingImm) {
514  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
515 
516  // FIXME: We could try to change the instruction from 64-bit to 32-bit
517  // to enable more folding opportunites. The shrink operands pass
518  // already does this.
519  return;
520  }
521 
522 
523  const MCInstrDesc &FoldDesc = OpToFold.getParent()->getDesc();
524  const TargetRegisterClass *FoldRC =
525  TRI->getRegClass(FoldDesc.OpInfo[0].RegClass);
526 
527 
528  // Split 64-bit constants into 32-bits for folding.
529  if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) {
530  unsigned UseReg = UseOp.getReg();
531  const TargetRegisterClass *UseRC
533  MRI->getRegClass(UseReg) :
534  TRI->getPhysRegClass(UseReg);
535 
536  if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64)
537  return;
538 
539  APInt Imm(64, OpToFold.getImm());
540  if (UseOp.getSubReg() == AMDGPU::sub0) {
541  Imm = Imm.getLoBits(32);
542  } else {
543  assert(UseOp.getSubReg() == AMDGPU::sub1);
544  Imm = Imm.getHiBits(32);
545  }
546 
547  MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
548  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
549  return;
550  }
551 
552 
553 
554  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
555 }
556 
557 static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result,
558  uint32_t LHS, uint32_t RHS) {
559  switch (Opcode) {
560  case AMDGPU::V_AND_B32_e64:
561  case AMDGPU::V_AND_B32_e32:
562  case AMDGPU::S_AND_B32:
563  Result = LHS & RHS;
564  return true;
565  case AMDGPU::V_OR_B32_e64:
566  case AMDGPU::V_OR_B32_e32:
567  case AMDGPU::S_OR_B32:
568  Result = LHS | RHS;
569  return true;
570  case AMDGPU::V_XOR_B32_e64:
571  case AMDGPU::V_XOR_B32_e32:
572  case AMDGPU::S_XOR_B32:
573  Result = LHS ^ RHS;
574  return true;
575  case AMDGPU::V_LSHL_B32_e64:
576  case AMDGPU::V_LSHL_B32_e32:
577  case AMDGPU::S_LSHL_B32:
578  // The instruction ignores the high bits for out of bounds shifts.
579  Result = LHS << (RHS & 31);
580  return true;
581  case AMDGPU::V_LSHLREV_B32_e64:
582  case AMDGPU::V_LSHLREV_B32_e32:
583  Result = RHS << (LHS & 31);
584  return true;
585  case AMDGPU::V_LSHR_B32_e64:
586  case AMDGPU::V_LSHR_B32_e32:
587  case AMDGPU::S_LSHR_B32:
588  Result = LHS >> (RHS & 31);
589  return true;
590  case AMDGPU::V_LSHRREV_B32_e64:
591  case AMDGPU::V_LSHRREV_B32_e32:
592  Result = RHS >> (LHS & 31);
593  return true;
594  case AMDGPU::V_ASHR_I32_e64:
595  case AMDGPU::V_ASHR_I32_e32:
596  case AMDGPU::S_ASHR_I32:
597  Result = static_cast<int32_t>(LHS) >> (RHS & 31);
598  return true;
599  case AMDGPU::V_ASHRREV_I32_e64:
600  case AMDGPU::V_ASHRREV_I32_e32:
601  Result = static_cast<int32_t>(RHS) >> (LHS & 31);
602  return true;
603  default:
604  return false;
605  }
606 }
607 
608 static unsigned getMovOpc(bool IsScalar) {
609  return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
610 }
611 
612 /// Remove any leftover implicit operands from mutating the instruction. e.g.
613 /// if we replace an s_and_b32 with a copy, we don't need the implicit scc def
614 /// anymore.
616  const MCInstrDesc &Desc = MI.getDesc();
617  unsigned NumOps = Desc.getNumOperands() +
618  Desc.getNumImplicitUses() +
619  Desc.getNumImplicitDefs();
620 
621  for (unsigned I = MI.getNumOperands() - 1; I >= NumOps; --I)
622  MI.RemoveOperand(I);
623 }
624 
625 static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) {
626  MI.setDesc(NewDesc);
628 }
629 
631  MachineOperand &Op) {
632  if (Op.isReg()) {
633  // If this has a subregister, it obviously is a register source.
634  if (Op.getSubReg() != AMDGPU::NoSubRegister ||
636  return &Op;
637 
638  MachineInstr *Def = MRI.getVRegDef(Op.getReg());
639  if (Def && Def->isMoveImmediate()) {
640  MachineOperand &ImmSrc = Def->getOperand(1);
641  if (ImmSrc.isImm())
642  return &ImmSrc;
643  }
644  }
645 
646  return &Op;
647 }
648 
649 // Try to simplify operations with a constant that may appear after instruction
650 // selection.
651 // TODO: See if a frame index with a fixed offset can fold.
653  const SIInstrInfo *TII,
654  MachineInstr *MI,
655  MachineOperand *ImmOp) {
656  unsigned Opc = MI->getOpcode();
657  if (Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 ||
658  Opc == AMDGPU::S_NOT_B32) {
659  MI->getOperand(1).ChangeToImmediate(~ImmOp->getImm());
660  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32)));
661  return true;
662  }
663 
664  int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
665  if (Src1Idx == -1)
666  return false;
667 
668  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
669  MachineOperand *Src0 = getImmOrMaterializedImm(MRI, MI->getOperand(Src0Idx));
670  MachineOperand *Src1 = getImmOrMaterializedImm(MRI, MI->getOperand(Src1Idx));
671 
672  if (!Src0->isImm() && !Src1->isImm())
673  return false;
674 
675  if (MI->getOpcode() == AMDGPU::V_LSHL_OR_B32) {
676  if (Src0->isImm() && Src0->getImm() == 0) {
677  // v_lshl_or_b32 0, X, Y -> copy Y
678  // v_lshl_or_b32 0, X, K -> v_mov_b32 K
679  bool UseCopy = TII->getNamedOperand(*MI, AMDGPU::OpName::src2)->isReg();
680  MI->RemoveOperand(Src1Idx);
681  MI->RemoveOperand(Src0Idx);
682 
683  MI->setDesc(TII->get(UseCopy ? AMDGPU::COPY : AMDGPU::V_MOV_B32_e32));
684  return true;
685  }
686  }
687 
688  // and k0, k1 -> v_mov_b32 (k0 & k1)
689  // or k0, k1 -> v_mov_b32 (k0 | k1)
690  // xor k0, k1 -> v_mov_b32 (k0 ^ k1)
691  if (Src0->isImm() && Src1->isImm()) {
692  int32_t NewImm;
693  if (!evalBinaryInstruction(Opc, NewImm, Src0->getImm(), Src1->getImm()))
694  return false;
695 
696  const SIRegisterInfo &TRI = TII->getRegisterInfo();
697  bool IsSGPR = TRI.isSGPRReg(MRI, MI->getOperand(0).getReg());
698 
699  // Be careful to change the right operand, src0 may belong to a different
700  // instruction.
701  MI->getOperand(Src0Idx).ChangeToImmediate(NewImm);
702  MI->RemoveOperand(Src1Idx);
703  mutateCopyOp(*MI, TII->get(getMovOpc(IsSGPR)));
704  return true;
705  }
706 
707  if (!MI->isCommutable())
708  return false;
709 
710  if (Src0->isImm() && !Src1->isImm()) {
711  std::swap(Src0, Src1);
712  std::swap(Src0Idx, Src1Idx);
713  }
714 
715  int32_t Src1Val = static_cast<int32_t>(Src1->getImm());
716  if (Opc == AMDGPU::V_OR_B32_e64 ||
717  Opc == AMDGPU::V_OR_B32_e32 ||
718  Opc == AMDGPU::S_OR_B32) {
719  if (Src1Val == 0) {
720  // y = or x, 0 => y = copy x
721  MI->RemoveOperand(Src1Idx);
722  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
723  } else if (Src1Val == -1) {
724  // y = or x, -1 => y = v_mov_b32 -1
725  MI->RemoveOperand(Src1Idx);
726  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_OR_B32)));
727  } else
728  return false;
729 
730  return true;
731  }
732 
733  if (MI->getOpcode() == AMDGPU::V_AND_B32_e64 ||
734  MI->getOpcode() == AMDGPU::V_AND_B32_e32 ||
735  MI->getOpcode() == AMDGPU::S_AND_B32) {
736  if (Src1Val == 0) {
737  // y = and x, 0 => y = v_mov_b32 0
738  MI->RemoveOperand(Src0Idx);
739  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_AND_B32)));
740  } else if (Src1Val == -1) {
741  // y = and x, -1 => y = copy x
742  MI->RemoveOperand(Src1Idx);
743  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
745  } else
746  return false;
747 
748  return true;
749  }
750 
751  if (MI->getOpcode() == AMDGPU::V_XOR_B32_e64 ||
752  MI->getOpcode() == AMDGPU::V_XOR_B32_e32 ||
753  MI->getOpcode() == AMDGPU::S_XOR_B32) {
754  if (Src1Val == 0) {
755  // y = xor x, 0 => y = copy x
756  MI->RemoveOperand(Src1Idx);
757  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
758  return true;
759  }
760  }
761 
762  return false;
763 }
764 
765 // Try to fold an instruction into a simpler one
766 static bool tryFoldInst(const SIInstrInfo *TII,
767  MachineInstr *MI) {
768  unsigned Opc = MI->getOpcode();
769 
770  if (Opc == AMDGPU::V_CNDMASK_B32_e32 ||
771  Opc == AMDGPU::V_CNDMASK_B32_e64 ||
772  Opc == AMDGPU::V_CNDMASK_B64_PSEUDO) {
773  const MachineOperand *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
774  const MachineOperand *Src1 = TII->getNamedOperand(*MI, AMDGPU::OpName::src1);
775  if (Src1->isIdenticalTo(*Src0)) {
776  LLVM_DEBUG(dbgs() << "Folded " << *MI << " into ");
777  int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
778  if (Src2Idx != -1)
779  MI->RemoveOperand(Src2Idx);
780  MI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1));
781  mutateCopyOp(*MI, TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY
782  : getMovOpc(false)));
783  LLVM_DEBUG(dbgs() << *MI << '\n');
784  return true;
785  }
786  }
787 
788  return false;
789 }
790 
791 void SIFoldOperands::foldInstOperand(MachineInstr &MI,
792  MachineOperand &OpToFold) const {
793  // We need mutate the operands of new mov instructions to add implicit
794  // uses of EXEC, but adding them invalidates the use_iterator, so defer
795  // this.
796  SmallVector<MachineInstr *, 4> CopiesToReplace;
798  MachineOperand &Dst = MI.getOperand(0);
799 
800  bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
801  if (FoldingImm) {
802  unsigned NumLiteralUses = 0;
803  MachineOperand *NonInlineUse = nullptr;
804  int NonInlineUseOpNo = -1;
805 
808  Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
809  Use != E; Use = NextUse) {
810  NextUse = std::next(Use);
811  MachineInstr *UseMI = Use->getParent();
812  unsigned OpNo = Use.getOperandNo();
813 
814  // Folding the immediate may reveal operations that can be constant
815  // folded or replaced with a copy. This can happen for example after
816  // frame indices are lowered to constants or from splitting 64-bit
817  // constants.
818  //
819  // We may also encounter cases where one or both operands are
820  // immediates materialized into a register, which would ordinarily not
821  // be folded due to multiple uses or operand constraints.
822 
823  if (OpToFold.isImm() && tryConstantFoldOp(*MRI, TII, UseMI, &OpToFold)) {
824  LLVM_DEBUG(dbgs() << "Constant folded " << *UseMI << '\n');
825 
826  // Some constant folding cases change the same immediate's use to a new
827  // instruction, e.g. and x, 0 -> 0. Make sure we re-visit the user
828  // again. The same constant folded instruction could also have a second
829  // use operand.
830  NextUse = MRI->use_begin(Dst.getReg());
831  FoldList.clear();
832  continue;
833  }
834 
835  // Try to fold any inline immediate uses, and then only fold other
836  // constants if they have one use.
837  //
838  // The legality of the inline immediate must be checked based on the use
839  // operand, not the defining instruction, because 32-bit instructions
840  // with 32-bit inline immediate sources may be used to materialize
841  // constants used in 16-bit operands.
842  //
843  // e.g. it is unsafe to fold:
844  // s_mov_b32 s0, 1.0 // materializes 0x3f800000
845  // v_add_f16 v0, v1, s0 // 1.0 f16 inline immediate sees 0x00003c00
846 
847  // Folding immediates with more than one use will increase program size.
848  // FIXME: This will also reduce register usage, which may be better
849  // in some cases. A better heuristic is needed.
850  if (isInlineConstantIfFolded(TII, *UseMI, OpNo, OpToFold)) {
851  foldOperand(OpToFold, UseMI, OpNo, FoldList, CopiesToReplace);
852  } else {
853  if (++NumLiteralUses == 1) {
854  NonInlineUse = &*Use;
855  NonInlineUseOpNo = OpNo;
856  }
857  }
858  }
859 
860  if (NumLiteralUses == 1) {
861  MachineInstr *UseMI = NonInlineUse->getParent();
862  foldOperand(OpToFold, UseMI, NonInlineUseOpNo, FoldList, CopiesToReplace);
863  }
864  } else {
865  // Folding register.
868  Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
869  Use != E; ++Use) {
870  UsesToProcess.push_back(Use);
871  }
872  for (auto U : UsesToProcess) {
873  MachineInstr *UseMI = U->getParent();
874 
875  foldOperand(OpToFold, UseMI, U.getOperandNo(),
876  FoldList, CopiesToReplace);
877  }
878  }
879 
880  MachineFunction *MF = MI.getParent()->getParent();
881  // Make sure we add EXEC uses to any new v_mov instructions created.
882  for (MachineInstr *Copy : CopiesToReplace)
883  Copy->addImplicitDefUseOperands(*MF);
884 
885  for (FoldCandidate &Fold : FoldList) {
886  if (updateOperand(Fold, *TII, *TRI)) {
887  // Clear kill flags.
888  if (Fold.isReg()) {
889  assert(Fold.OpToFold && Fold.OpToFold->isReg());
890  // FIXME: Probably shouldn't bother trying to fold if not an
891  // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR
892  // copies.
893  MRI->clearKillFlags(Fold.OpToFold->getReg());
894  }
895  LLVM_DEBUG(dbgs() << "Folded source from " << MI << " into OpNo "
896  << static_cast<int>(Fold.UseOpNo) << " of "
897  << *Fold.UseMI << '\n');
898  tryFoldInst(TII, Fold.UseMI);
899  } else if (Fold.isCommuted()) {
900  // Restoring instruction's original operand order if fold has failed.
901  TII->commuteInstruction(*Fold.UseMI, false);
902  }
903  }
904 }
905 
906 // Clamp patterns are canonically selected to v_max_* instructions, so only
907 // handle them.
908 const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
909  unsigned Op = MI.getOpcode();
910  switch (Op) {
911  case AMDGPU::V_MAX_F32_e64:
912  case AMDGPU::V_MAX_F16_e64:
913  case AMDGPU::V_MAX_F64:
914  case AMDGPU::V_PK_MAX_F16: {
915  if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm())
916  return nullptr;
917 
918  // Make sure sources are identical.
919  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
920  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
921  if (!Src0->isReg() || !Src1->isReg() ||
922  Src0->getReg() != Src1->getReg() ||
923  Src0->getSubReg() != Src1->getSubReg() ||
924  Src0->getSubReg() != AMDGPU::NoSubRegister)
925  return nullptr;
926 
927  // Can't fold up if we have modifiers.
928  if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
929  return nullptr;
930 
931  unsigned Src0Mods
932  = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm();
933  unsigned Src1Mods
934  = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm();
935 
936  // Having a 0 op_sel_hi would require swizzling the output in the source
937  // instruction, which we can't do.
938  unsigned UnsetMods = (Op == AMDGPU::V_PK_MAX_F16) ? SISrcMods::OP_SEL_1 : 0;
939  if (Src0Mods != UnsetMods && Src1Mods != UnsetMods)
940  return nullptr;
941  return Src0;
942  }
943  default:
944  return nullptr;
945  }
946 }
947 
948 // We obviously have multiple uses in a clamp since the register is used twice
949 // in the same instruction.
950 static bool hasOneNonDBGUseInst(const MachineRegisterInfo &MRI, unsigned Reg) {
951  int Count = 0;
952  for (auto I = MRI.use_instr_nodbg_begin(Reg), E = MRI.use_instr_nodbg_end();
953  I != E; ++I) {
954  if (++Count > 1)
955  return false;
956  }
957 
958  return true;
959 }
960 
961 // FIXME: Clamp for v_mad_mixhi_f16 handled during isel.
962 bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) {
963  const MachineOperand *ClampSrc = isClamp(MI);
964  if (!ClampSrc || !hasOneNonDBGUseInst(*MRI, ClampSrc->getReg()))
965  return false;
966 
967  MachineInstr *Def = MRI->getVRegDef(ClampSrc->getReg());
968 
969  // The type of clamp must be compatible.
970  if (TII->getClampMask(*Def) != TII->getClampMask(MI))
971  return false;
972 
973  MachineOperand *DefClamp = TII->getNamedOperand(*Def, AMDGPU::OpName::clamp);
974  if (!DefClamp)
975  return false;
976 
977  LLVM_DEBUG(dbgs() << "Folding clamp " << *DefClamp << " into " << *Def
978  << '\n');
979 
980  // Clamp is applied after omod, so it is OK if omod is set.
981  DefClamp->setImm(1);
982  MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
983  MI.eraseFromParent();
984  return true;
985 }
986 
987 static int getOModValue(unsigned Opc, int64_t Val) {
988  switch (Opc) {
989  case AMDGPU::V_MUL_F32_e64: {
990  switch (static_cast<uint32_t>(Val)) {
991  case 0x3f000000: // 0.5
992  return SIOutMods::DIV2;
993  case 0x40000000: // 2.0
994  return SIOutMods::MUL2;
995  case 0x40800000: // 4.0
996  return SIOutMods::MUL4;
997  default:
998  return SIOutMods::NONE;
999  }
1000  }
1001  case AMDGPU::V_MUL_F16_e64: {
1002  switch (static_cast<uint16_t>(Val)) {
1003  case 0x3800: // 0.5
1004  return SIOutMods::DIV2;
1005  case 0x4000: // 2.0
1006  return SIOutMods::MUL2;
1007  case 0x4400: // 4.0
1008  return SIOutMods::MUL4;
1009  default:
1010  return SIOutMods::NONE;
1011  }
1012  }
1013  default:
1014  llvm_unreachable("invalid mul opcode");
1015  }
1016 }
1017 
1018 // FIXME: Does this really not support denormals with f16?
1019 // FIXME: Does this need to check IEEE mode bit? SNaNs are generally not
1020 // handled, so will anything other than that break?
1021 std::pair<const MachineOperand *, int>
1022 SIFoldOperands::isOMod(const MachineInstr &MI) const {
1023  unsigned Op = MI.getOpcode();
1024  switch (Op) {
1025  case AMDGPU::V_MUL_F32_e64:
1026  case AMDGPU::V_MUL_F16_e64: {
1027  // If output denormals are enabled, omod is ignored.
1028  if ((Op == AMDGPU::V_MUL_F32_e64 && ST->hasFP32Denormals()) ||
1029  (Op == AMDGPU::V_MUL_F16_e64 && ST->hasFP16Denormals()))
1030  return std::make_pair(nullptr, SIOutMods::NONE);
1031 
1032  const MachineOperand *RegOp = nullptr;
1033  const MachineOperand *ImmOp = nullptr;
1034  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1035  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1036  if (Src0->isImm()) {
1037  ImmOp = Src0;
1038  RegOp = Src1;
1039  } else if (Src1->isImm()) {
1040  ImmOp = Src1;
1041  RegOp = Src0;
1042  } else
1043  return std::make_pair(nullptr, SIOutMods::NONE);
1044 
1045  int OMod = getOModValue(Op, ImmOp->getImm());
1046  if (OMod == SIOutMods::NONE ||
1047  TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
1048  TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) ||
1049  TII->hasModifiersSet(MI, AMDGPU::OpName::omod) ||
1050  TII->hasModifiersSet(MI, AMDGPU::OpName::clamp))
1051  return std::make_pair(nullptr, SIOutMods::NONE);
1052 
1053  return std::make_pair(RegOp, OMod);
1054  }
1055  case AMDGPU::V_ADD_F32_e64:
1056  case AMDGPU::V_ADD_F16_e64: {
1057  // If output denormals are enabled, omod is ignored.
1058  if ((Op == AMDGPU::V_ADD_F32_e64 && ST->hasFP32Denormals()) ||
1059  (Op == AMDGPU::V_ADD_F16_e64 && ST->hasFP16Denormals()))
1060  return std::make_pair(nullptr, SIOutMods::NONE);
1061 
1062  // Look through the DAGCombiner canonicalization fmul x, 2 -> fadd x, x
1063  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1064  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1065 
1066  if (Src0->isReg() && Src1->isReg() && Src0->getReg() == Src1->getReg() &&
1067  Src0->getSubReg() == Src1->getSubReg() &&
1068  !TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) &&
1069  !TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) &&
1070  !TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) &&
1071  !TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
1072  return std::make_pair(Src0, SIOutMods::MUL2);
1073 
1074  return std::make_pair(nullptr, SIOutMods::NONE);
1075  }
1076  default:
1077  return std::make_pair(nullptr, SIOutMods::NONE);
1078  }
1079 }
1080 
1081 // FIXME: Does this need to check IEEE bit on function?
1082 bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) {
1083  const MachineOperand *RegOp;
1084  int OMod;
1085  std::tie(RegOp, OMod) = isOMod(MI);
1086  if (OMod == SIOutMods::NONE || !RegOp->isReg() ||
1087  RegOp->getSubReg() != AMDGPU::NoSubRegister ||
1088  !hasOneNonDBGUseInst(*MRI, RegOp->getReg()))
1089  return false;
1090 
1091  MachineInstr *Def = MRI->getVRegDef(RegOp->getReg());
1092  MachineOperand *DefOMod = TII->getNamedOperand(*Def, AMDGPU::OpName::omod);
1093  if (!DefOMod || DefOMod->getImm() != SIOutMods::NONE)
1094  return false;
1095 
1096  // Clamp is applied after omod. If the source already has clamp set, don't
1097  // fold it.
1098  if (TII->hasModifiersSet(*Def, AMDGPU::OpName::clamp))
1099  return false;
1100 
1101  LLVM_DEBUG(dbgs() << "Folding omod " << MI << " into " << *Def << '\n');
1102 
1103  DefOMod->setImm(OMod);
1104  MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
1105  MI.eraseFromParent();
1106  return true;
1107 }
1108 
1109 bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
1110  if (skipFunction(MF.getFunction()))
1111  return false;
1112 
1113  MRI = &MF.getRegInfo();
1114  ST = &MF.getSubtarget<GCNSubtarget>();
1115  TII = ST->getInstrInfo();
1116  TRI = &TII->getRegisterInfo();
1117 
1119 
1120  // omod is ignored by hardware if IEEE bit is enabled. omod also does not
1121  // correctly handle signed zeros.
1122  //
1123  bool IsIEEEMode = ST->enableIEEEBit(MF);
1124  bool HasNSZ = MFI->hasNoSignedZerosFPMath();
1125 
1126  for (MachineBasicBlock *MBB : depth_first(&MF)) {
1128  for (I = MBB->begin(); I != MBB->end(); I = Next) {
1129  Next = std::next(I);
1130  MachineInstr &MI = *I;
1131 
1132  tryFoldInst(TII, &MI);
1133 
1134  if (!TII->isFoldableCopy(MI)) {
1135  // TODO: Omod might be OK if there is NSZ only on the source
1136  // instruction, and not the omod multiply.
1137  if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) ||
1138  !tryFoldOMod(MI))
1139  tryFoldClamp(MI);
1140  continue;
1141  }
1142 
1143  MachineOperand &OpToFold = MI.getOperand(1);
1144  bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
1145 
1146  // FIXME: We could also be folding things like TargetIndexes.
1147  if (!FoldingImm && !OpToFold.isReg())
1148  continue;
1149 
1150  if (OpToFold.isReg() &&
1152  continue;
1153 
1154  // Prevent folding operands backwards in the function. For example,
1155  // the COPY opcode must not be replaced by 1 in this example:
1156  //
1157  // %3 = COPY %vgpr0; VGPR_32:%3
1158  // ...
1159  // %vgpr0 = V_MOV_B32_e32 1, implicit %exec
1160  MachineOperand &Dst = MI.getOperand(0);
1161  if (Dst.isReg() &&
1163  continue;
1164 
1165  foldInstOperand(MI, OpToFold);
1166  }
1167  }
1168  return false;
1169 }
static bool isReg(const MCInst &MI, unsigned OpNo)
unsigned getNumImplicitUses() const
Return the number of implicit uses this instruction has.
Definition: MCInstrDesc.h:527
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
AMDGPU specific subclass of TargetSubtarget.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
unsigned getNumImplicitDefs() const
Return the number of implicit defs this instruct has.
Definition: MCInstrDesc.h:549
This class represents lattice values for constants.
Definition: AllocatorList.h:24
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, unsigned Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before...
static bool isUseMIInFoldList(ArrayRef< FoldCandidate > FoldList, const MachineInstr *MI)
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
static bool isUseSafeToFold(const SIInstrInfo *TII, const MachineInstr &MI, const MachineOperand &UseMO)
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:383
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:164
unsigned getReg() const
getReg - Returns the register number.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:509
void setIsUndef(bool Val=true)
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned Reg
unsigned getSubReg() const
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
bool isRegSequence() const
unsigned const TargetRegisterInfo * TRI
F(f)
void substVirtReg(unsigned Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
bool isInlineConstant(const APInt &Imm) const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:165
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction...
Definition: MachineInstr.h:700
static unsigned getMovOpc(bool IsScalar)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:516
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:211
const HexagonInstrInfo * TII
bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:412
static bool hasOneNonDBGUseInst(const MachineRegisterInfo &MRI, unsigned Reg)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
static bool tryAddToFoldList(SmallVectorImpl< FoldCandidate > &FoldList, MachineInstr *MI, unsigned OpNo, MachineOperand *OpToFold, const SIInstrInfo *TII)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
static MachineOperand * getImmOrMaterializedImm(MachineRegisterInfo &MRI, MachineOperand &Op)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
unsigned getID() const
Return the register class ID number.
static int getOModValue(unsigned Opc, int64_t Val)
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
uint8_t OperandType
Information about the type of the operand.
Definition: MCInstrDesc.h:79
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:406
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:48
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static const unsigned CommuteAnyOperandIndex
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned const MachineRegisterInfo * MRI
static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
Definition: MCInstrDesc.h:235
MachineInstrBuilder & UseMI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Register is known to be fully dead.
Represent the analysis usage information of a pass.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
bool isCopy() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
#define DEBUG_TYPE
void setIsKill(bool Val=true)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:34
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
LLVM_READONLY int getVOPe32(uint16_t Opcode)
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
static bool updateOperand(FoldCandidate &Fold, const SIInstrInfo &TII, const TargetRegisterInfo &TRI)
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:286
static bool tryFoldInst(const SIInstrInfo *TII, MachineInstr *MI)
int64_t getImm() const
static void stripExtraCopyOperands(MachineInstr &MI)
Remove any leftover implicit operands from mutating the instruction.
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
The access may modify the value stored in memory.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
Class for arbitrary precision integers.
Definition: APInt.h:70
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result, uint32_t LHS, uint32_t RHS)
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:254
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void initializeSIFoldOperandsPass(PassRegistry &)
char & SIFoldOperandsID
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:73
FunctionPass * createSIFoldOperandsPass()
static unsigned UseReg(const MachineOperand &MO)
void setReg(unsigned Reg)
Change the register this operand corresponds to.
static MachineOperand CreateImm(int64_t Val)
#define I(x, y, z)
Definition: MD5.cpp:58
void setSubReg(unsigned subReg)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
static bool isInlineConstantIfFolded(const SIInstrInfo *TII, const MachineInstr &UseMI, unsigned OpNo, const MachineOperand &OpToFold)
Abstract Stack Frame Index.
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:346
bool isReg() const
isReg - Tests if this is a MO_Register operand.
iterator_range< df_iterator< T > > depth_first(const T &G)
const unsigned Kind
static use_instr_nodbg_iterator use_instr_nodbg_end()
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
use_instr_nodbg_iterator use_instr_nodbg_begin(unsigned RegNo) const
static bool isSDWA(const MachineInstr &MI)
Definition: SIInstrInfo.h:403
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:175
void ChangeToFrameIndex(int Idx)
Replace this operand with a frame index.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
void RemoveOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
static bool tryConstantFoldOp(MachineRegisterInfo &MRI, const SIInstrInfo *TII, MachineInstr *MI, MachineOperand *ImmOp)
#define LLVM_DEBUG(X)
Definition: Debug.h:123
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:295
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MachineInstr.h:848
bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool isImplicit() const
const SIRegisterInfo * getRegisterInfo() const override