LLVM  8.0.1
MLxExpansionPass.cpp
Go to the documentation of this file.
1 //===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ---------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Expand VFP / NEON floating point MLA / MLS instructions (each to a pair of
11 // multiple and add / sub instructions) when special VMLx hazards are detected.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ARM.h"
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMSubtarget.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/ADT/Statistic.h"
26 #include "llvm/Support/Debug.h"
28 using namespace llvm;
29 
30 #define DEBUG_TYPE "mlx-expansion"
31 
32 static cl::opt<bool>
33 ForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden);
34 static cl::opt<unsigned>
35 ExpandLimit("expand-limit", cl::init(~0U), cl::Hidden);
36 
37 STATISTIC(NumExpand, "Number of fp MLA / MLS instructions expanded");
38 
39 namespace {
40  struct MLxExpansion : public MachineFunctionPass {
41  static char ID;
42  MLxExpansion() : MachineFunctionPass(ID) {}
43 
44  bool runOnMachineFunction(MachineFunction &Fn) override;
45 
46  StringRef getPassName() const override {
47  return "ARM MLA / MLS expansion pass";
48  }
49 
50  private:
51  const ARMBaseInstrInfo *TII;
52  const TargetRegisterInfo *TRI;
54 
55  bool isLikeA9;
56  bool isSwift;
57  unsigned MIIdx;
58  MachineInstr* LastMIs[4];
60 
61  void clearStack();
62  void pushStack(MachineInstr *MI);
63  MachineInstr *getAccDefMI(MachineInstr *MI) const;
64  unsigned getDefReg(MachineInstr *MI) const;
65  bool hasLoopHazard(MachineInstr *MI) const;
66  bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const;
67  bool FindMLxHazard(MachineInstr *MI);
68  void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
69  unsigned MulOpc, unsigned AddSubOpc,
70  bool NegAcc, bool HasLane);
71  bool ExpandFPMLxInstructions(MachineBasicBlock &MBB);
72  };
73  char MLxExpansion::ID = 0;
74 }
75 
76 void MLxExpansion::clearStack() {
77  std::fill(LastMIs, LastMIs + 4, nullptr);
78  MIIdx = 0;
79 }
80 
81 void MLxExpansion::pushStack(MachineInstr *MI) {
82  LastMIs[MIIdx] = MI;
83  if (++MIIdx == 4)
84  MIIdx = 0;
85 }
86 
87 MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const {
88  // Look past COPY and INSERT_SUBREG instructions to find the
89  // real definition MI. This is important for _sfp instructions.
90  unsigned Reg = MI->getOperand(1).getReg();
92  return nullptr;
93 
94  MachineBasicBlock *MBB = MI->getParent();
95  MachineInstr *DefMI = MRI->getVRegDef(Reg);
96  while (true) {
97  if (DefMI->getParent() != MBB)
98  break;
99  if (DefMI->isCopyLike()) {
100  Reg = DefMI->getOperand(1).getReg();
102  DefMI = MRI->getVRegDef(Reg);
103  continue;
104  }
105  } else if (DefMI->isInsertSubreg()) {
106  Reg = DefMI->getOperand(2).getReg();
108  DefMI = MRI->getVRegDef(Reg);
109  continue;
110  }
111  }
112  break;
113  }
114  return DefMI;
115 }
116 
117 unsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
118  unsigned Reg = MI->getOperand(0).getReg();
120  !MRI->hasOneNonDBGUse(Reg))
121  return Reg;
122 
123  MachineBasicBlock *MBB = MI->getParent();
124  MachineInstr *UseMI = &*MRI->use_instr_nodbg_begin(Reg);
125  if (UseMI->getParent() != MBB)
126  return Reg;
127 
128  while (UseMI->isCopy() || UseMI->isInsertSubreg()) {
129  Reg = UseMI->getOperand(0).getReg();
131  !MRI->hasOneNonDBGUse(Reg))
132  return Reg;
133  UseMI = &*MRI->use_instr_nodbg_begin(Reg);
134  if (UseMI->getParent() != MBB)
135  return Reg;
136  }
137 
138  return Reg;
139 }
140 
141 /// hasLoopHazard - Check whether an MLx instruction is chained to itself across
142 /// a single-MBB loop.
143 bool MLxExpansion::hasLoopHazard(MachineInstr *MI) const {
144  unsigned Reg = MI->getOperand(1).getReg();
146  return false;
147 
148  MachineBasicBlock *MBB = MI->getParent();
149  MachineInstr *DefMI = MRI->getVRegDef(Reg);
150  while (true) {
151 outer_continue:
152  if (DefMI->getParent() != MBB)
153  break;
154 
155  if (DefMI->isPHI()) {
156  for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) {
157  if (DefMI->getOperand(i + 1).getMBB() == MBB) {
158  unsigned SrcReg = DefMI->getOperand(i).getReg();
160  DefMI = MRI->getVRegDef(SrcReg);
161  goto outer_continue;
162  }
163  }
164  }
165  } else if (DefMI->isCopyLike()) {
166  Reg = DefMI->getOperand(1).getReg();
168  DefMI = MRI->getVRegDef(Reg);
169  continue;
170  }
171  } else if (DefMI->isInsertSubreg()) {
172  Reg = DefMI->getOperand(2).getReg();
174  DefMI = MRI->getVRegDef(Reg);
175  continue;
176  }
177  }
178 
179  break;
180  }
181 
182  return DefMI == MI;
183 }
184 
185 bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
186  // FIXME: Detect integer instructions properly.
187  const MCInstrDesc &MCID = MI->getDesc();
188  unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
189  if (MI->mayStore())
190  return false;
191  unsigned Opcode = MCID.getOpcode();
192  if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
193  return false;
194  if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
195  return MI->readsRegister(Reg, TRI);
196  return false;
197 }
198 
199 static bool isFpMulInstruction(unsigned Opcode) {
200  switch (Opcode) {
201  case ARM::VMULS:
202  case ARM::VMULfd:
203  case ARM::VMULfq:
204  case ARM::VMULD:
205  case ARM::VMULslfd:
206  case ARM::VMULslfq:
207  return true;
208  default:
209  return false;
210  }
211 }
212 
213 bool MLxExpansion::FindMLxHazard(MachineInstr *MI) {
214  if (NumExpand >= ExpandLimit)
215  return false;
216 
217  if (ForceExapnd)
218  return true;
219 
220  MachineInstr *DefMI = getAccDefMI(MI);
221  if (TII->isFpMLxInstruction(DefMI->getOpcode())) {
222  // r0 = vmla
223  // r3 = vmla r0, r1, r2
224  // takes 16 - 17 cycles
225  //
226  // r0 = vmla
227  // r4 = vmul r1, r2
228  // r3 = vadd r0, r4
229  // takes about 14 - 15 cycles even with vmul stalling for 4 cycles.
230  IgnoreStall.insert(DefMI);
231  return true;
232  }
233 
234  // On Swift, we mostly care about hazards from multiplication instructions
235  // writing the accumulator and the pipelining of loop iterations by out-of-
236  // order execution.
237  if (isSwift)
238  return isFpMulInstruction(DefMI->getOpcode()) || hasLoopHazard(MI);
239 
240  if (IgnoreStall.count(MI))
241  return false;
242 
243  // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the
244  // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall
245  // preserves the in-order retirement of the instructions.
246  // Look at the next few instructions, if *most* of them can cause hazards,
247  // then the scheduler can't *fix* this, we'd better break up the VMLA.
248  unsigned Limit1 = isLikeA9 ? 1 : 4;
249  unsigned Limit2 = isLikeA9 ? 1 : 4;
250  for (unsigned i = 1; i <= 4; ++i) {
251  int Idx = ((int)MIIdx - i + 4) % 4;
252  MachineInstr *NextMI = LastMIs[Idx];
253  if (!NextMI)
254  continue;
255 
256  if (TII->canCauseFpMLxStall(NextMI->getOpcode())) {
257  if (i <= Limit1)
258  return true;
259  }
260 
261  // Look for VMLx RAW hazard.
262  if (i <= Limit2 && hasRAWHazard(getDefReg(MI), NextMI))
263  return true;
264  }
265 
266  return false;
267 }
268 
269 /// ExpandFPMLxInstructions - Expand a MLA / MLS instruction into a pair
270 /// of MUL + ADD / SUB instructions.
271 void
272 MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
273  unsigned MulOpc, unsigned AddSubOpc,
274  bool NegAcc, bool HasLane) {
275  unsigned DstReg = MI->getOperand(0).getReg();
276  bool DstDead = MI->getOperand(0).isDead();
277  unsigned AccReg = MI->getOperand(1).getReg();
278  unsigned Src1Reg = MI->getOperand(2).getReg();
279  unsigned Src2Reg = MI->getOperand(3).getReg();
280  bool Src1Kill = MI->getOperand(2).isKill();
281  bool Src2Kill = MI->getOperand(3).isKill();
282  unsigned LaneImm = HasLane ? MI->getOperand(4).getImm() : 0;
283  unsigned NextOp = HasLane ? 5 : 4;
284  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NextOp).getImm();
285  unsigned PredReg = MI->getOperand(++NextOp).getReg();
286 
287  const MCInstrDesc &MCID1 = TII->get(MulOpc);
288  const MCInstrDesc &MCID2 = TII->get(AddSubOpc);
289  const MachineFunction &MF = *MI->getParent()->getParent();
290  unsigned TmpReg = MRI->createVirtualRegister(
291  TII->getRegClass(MCID1, 0, TRI, MF));
292 
293  MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg)
294  .addReg(Src1Reg, getKillRegState(Src1Kill))
295  .addReg(Src2Reg, getKillRegState(Src2Kill));
296  if (HasLane)
297  MIB.addImm(LaneImm);
298  MIB.addImm(Pred).addReg(PredReg);
299 
300  MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID2)
301  .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead));
302 
303  if (NegAcc) {
304  bool AccKill = MRI->hasOneNonDBGUse(AccReg);
305  MIB.addReg(TmpReg, getKillRegState(true))
306  .addReg(AccReg, getKillRegState(AccKill));
307  } else {
308  MIB.addReg(AccReg).addReg(TmpReg, getKillRegState(true));
309  }
310  MIB.addImm(Pred).addReg(PredReg);
311 
312  LLVM_DEBUG({
313  dbgs() << "Expanding: " << *MI;
314  dbgs() << " to:\n";
316  MII = std::prev(MII);
317  MachineInstr &MI2 = *MII;
318  MII = std::prev(MII);
319  MachineInstr &MI1 = *MII;
320  dbgs() << " " << MI1;
321  dbgs() << " " << MI2;
322  });
323 
324  MI->eraseFromParent();
325  ++NumExpand;
326 }
327 
328 bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
329  bool Changed = false;
330 
331  clearStack();
332  IgnoreStall.clear();
333 
334  unsigned Skip = 0;
335  MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend();
336  while (MII != E) {
337  MachineInstr *MI = &*MII++;
338 
339  if (MI->isPosition() || MI->isImplicitDef() || MI->isCopy())
340  continue;
341 
342  const MCInstrDesc &MCID = MI->getDesc();
343  if (MI->isBarrier()) {
344  clearStack();
345  Skip = 0;
346  continue;
347  }
348 
349  unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
350  if (Domain == ARMII::DomainGeneral) {
351  if (++Skip == 2)
352  // Assume dual issues of non-VFP / NEON instructions.
353  pushStack(nullptr);
354  } else {
355  Skip = 0;
356 
357  unsigned MulOpc, AddSubOpc;
358  bool NegAcc, HasLane;
359  if (!TII->isFpMLxInstruction(MCID.getOpcode(),
360  MulOpc, AddSubOpc, NegAcc, HasLane) ||
361  !FindMLxHazard(MI))
362  pushStack(MI);
363  else {
364  ExpandFPMLxInstruction(MBB, MI, MulOpc, AddSubOpc, NegAcc, HasLane);
365  Changed = true;
366  }
367  }
368  }
369 
370  return Changed;
371 }
372 
373 bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
374  if (skipFunction(Fn.getFunction()))
375  return false;
376 
377  TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo());
379  MRI = &Fn.getRegInfo();
380  const ARMSubtarget *STI = &Fn.getSubtarget<ARMSubtarget>();
381  if (!STI->expandMLx())
382  return false;
383  isLikeA9 = STI->isLikeA9() || STI->isSwift();
384  isSwift = STI->isSwift();
385 
386  bool Modified = false;
387  for (MachineBasicBlock &MBB : Fn)
388  Modified |= ExpandFPMLxInstructions(MBB);
389 
390  return Modified;
391 }
392 
394  return new MLxExpansion();
395 }
FunctionPass * createMLxExpansionPass()
MachineBasicBlock * getMBB() const
This class represents lattice values for constants.
Definition: AllocatorList.h:24
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:383
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:164
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned Reg
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
bool isCopyLike() const
Return true if the instruction behaves like a copy.
bool isPHI() const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:412
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
static cl::opt< unsigned > ExpandLimit("expand-limit", cl::init(~0U), cl::Hidden)
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:406
static cl::opt< bool > ForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden)
virtual const TargetInstrInfo * getInstrInfo() const
reverse_iterator rend()
reverse_iterator rbegin()
unsigned getKillRegState(bool B)
unsigned getDeadRegState(bool B)
unsigned getDefRegState(bool B)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:820
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineInstrBuilder & UseMI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
bool isCopy() const
bool isImplicitDef() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI, const TargetRegisterInfo &TRI)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
bool isInsertSubreg() const
MachineInstrBuilder MachineInstrBuilder & DefMI
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
bool readsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
bool expandMLx() const
Definition: ARMSubtarget.h:610
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:254
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
Definition: MachineInstr.h:64
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool isSwift() const
Definition: ARMSubtarget.h:558
bool isPosition() const
Definition: MachineInstr.h:995
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition: MCInstrDesc.h:204
IRTranslator LLVM IR MI
bool isBarrier(QueryType Type=AnyInBundle) const
Returns true if the specified instruction stops control flow from executing the instruction immediate...
Definition: MachineInstr.h:640
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
#define LLVM_DEBUG(X)
Definition: Debug.h:123
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
static bool isFpMulInstruction(unsigned Opcode)
bool isLikeA9() const
Definition: ARMSubtarget.h:560