LLVM  8.0.1
A15SDOptimizer.cpp
Go to the documentation of this file.
1 //=== A15SDOptimizerPass.cpp - Optimize DPR and SPR register accesses on A15==//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // The Cortex-A15 processor employs a tracking scheme in its register renaming
11 // in order to process each instruction's micro-ops speculatively and
12 // out-of-order with appropriate forwarding. The ARM architecture allows VFP
13 // instructions to read and write 32-bit S-registers. Each S-register
14 // corresponds to one half (upper or lower) of an overlaid 64-bit D-register.
15 //
16 // There are several instruction patterns which can be used to provide this
17 // capability which can provide higher performance than other, potentially more
18 // direct patterns, specifically around when one micro-op reads a D-register
19 // operand that has recently been written as one or more S-register results.
20 //
21 // This file defines a pre-regalloc pass which looks for SPR producers which
22 // are going to be used by a DPR (or QPR) consumers and creates the more
23 // optimized access pattern.
24 //
25 //===----------------------------------------------------------------------===//
26 
27 #include "ARM.h"
28 #include "ARMBaseInstrInfo.h"
29 #include "ARMBaseRegisterInfo.h"
30 #include "ARMSubtarget.h"
31 #include "llvm/ADT/Statistic.h"
39 #include "llvm/Support/Debug.h"
41 #include <map>
42 #include <set>
43 
44 using namespace llvm;
45 
46 #define DEBUG_TYPE "a15-sd-optimizer"
47 
48 namespace {
49  struct A15SDOptimizer : public MachineFunctionPass {
50  static char ID;
51  A15SDOptimizer() : MachineFunctionPass(ID) {}
52 
53  bool runOnMachineFunction(MachineFunction &Fn) override;
54 
55  StringRef getPassName() const override { return "ARM A15 S->D optimizer"; }
56 
57  private:
58  const ARMBaseInstrInfo *TII;
59  const TargetRegisterInfo *TRI;
61 
62  bool runOnInstruction(MachineInstr *MI);
63 
64  //
65  // Instruction builder helpers
66  //
67  unsigned createDupLane(MachineBasicBlock &MBB,
68  MachineBasicBlock::iterator InsertBefore,
69  const DebugLoc &DL, unsigned Reg, unsigned Lane,
70  bool QPR = false);
71 
72  unsigned createExtractSubreg(MachineBasicBlock &MBB,
73  MachineBasicBlock::iterator InsertBefore,
74  const DebugLoc &DL, unsigned DReg,
75  unsigned Lane, const TargetRegisterClass *TRC);
76 
77  unsigned createVExt(MachineBasicBlock &MBB,
78  MachineBasicBlock::iterator InsertBefore,
79  const DebugLoc &DL, unsigned Ssub0, unsigned Ssub1);
80 
81  unsigned createRegSequence(MachineBasicBlock &MBB,
82  MachineBasicBlock::iterator InsertBefore,
83  const DebugLoc &DL, unsigned Reg1,
84  unsigned Reg2);
85 
86  unsigned createInsertSubreg(MachineBasicBlock &MBB,
87  MachineBasicBlock::iterator InsertBefore,
88  const DebugLoc &DL, unsigned DReg,
89  unsigned Lane, unsigned ToInsert);
90 
91  unsigned createImplicitDef(MachineBasicBlock &MBB,
92  MachineBasicBlock::iterator InsertBefore,
93  const DebugLoc &DL);
94 
95  //
96  // Various property checkers
97  //
98  bool usesRegClass(MachineOperand &MO, const TargetRegisterClass *TRC);
99  bool hasPartialWrite(MachineInstr *MI);
100  SmallVector<unsigned, 8> getReadDPRs(MachineInstr *MI);
101  unsigned getDPRLaneFromSPR(unsigned SReg);
102 
103  //
104  // Methods used for getting the definitions of partial registers
105  //
106 
107  MachineInstr *elideCopies(MachineInstr *MI);
108  void elideCopiesAndPHIs(MachineInstr *MI,
110 
111  //
112  // Pattern optimization methods
113  //
114  unsigned optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg);
115  unsigned optimizeSDPattern(MachineInstr *MI);
116  unsigned getPrefSPRLane(unsigned SReg);
117 
118  //
119  // Sanitizing method - used to make sure if don't leave dead code around.
120  //
121  void eraseInstrWithNoUses(MachineInstr *MI);
122 
123  //
124  // A map used to track the changes done by this pass.
125  //
126  std::map<MachineInstr*, unsigned> Replacements;
127  std::set<MachineInstr *> DeadInstr;
128  };
129  char A15SDOptimizer::ID = 0;
130 } // end anonymous namespace
131 
132 // Returns true if this is a use of a SPR register.
133 bool A15SDOptimizer::usesRegClass(MachineOperand &MO,
134  const TargetRegisterClass *TRC) {
135  if (!MO.isReg())
136  return false;
137  unsigned Reg = MO.getReg();
138 
140  return MRI->getRegClass(Reg)->hasSuperClassEq(TRC);
141  else
142  return TRC->contains(Reg);
143 }
144 
145 unsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) {
146  unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1,
147  &ARM::DPRRegClass);
148  if (DReg != ARM::NoRegister) return ARM::ssub_1;
149  return ARM::ssub_0;
150 }
151 
152 // Get the subreg type that is most likely to be coalesced
153 // for an SPR register that will be used in VDUP32d pseudo.
154 unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) {
155  if (!TRI->isVirtualRegister(SReg))
156  return getDPRLaneFromSPR(SReg);
157 
158  MachineInstr *MI = MRI->getVRegDef(SReg);
159  if (!MI) return ARM::ssub_0;
160  MachineOperand *MO = MI->findRegisterDefOperand(SReg);
161 
162  assert(MO->isReg() && "Non-register operand found!");
163  if (!MO) return ARM::ssub_0;
164 
165  if (MI->isCopy() && usesRegClass(MI->getOperand(1),
166  &ARM::SPRRegClass)) {
167  SReg = MI->getOperand(1).getReg();
168  }
169 
171  if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1;
172  return ARM::ssub_0;
173  }
174  return getDPRLaneFromSPR(SReg);
175 }
176 
177 // MI is known to be dead. Figure out what instructions
178 // are also made dead by this and mark them for removal.
179 void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) {
181  DeadInstr.insert(MI);
182 
183  LLVM_DEBUG(dbgs() << "Deleting base instruction " << *MI << "\n");
184  Front.push_back(MI);
185 
186  while (Front.size() != 0) {
187  MI = Front.back();
188  Front.pop_back();
189 
190  // MI is already known to be dead. We need to see
191  // if other instructions can also be removed.
192  for (MachineOperand &MO : MI->operands()) {
193  if ((!MO.isReg()) || (!MO.isUse()))
194  continue;
195  unsigned Reg = MO.getReg();
196  if (!TRI->isVirtualRegister(Reg))
197  continue;
199 
200  if (!Op)
201  continue;
202 
203  MachineInstr *Def = Op->getParent();
204 
205  // We don't need to do anything if we have already marked
206  // this instruction as being dead.
207  if (DeadInstr.find(Def) != DeadInstr.end())
208  continue;
209 
210  // Check if all the uses of this instruction are marked as
211  // dead. If so, we can also mark this instruction as being
212  // dead.
213  bool IsDead = true;
214  for (MachineOperand &MODef : Def->operands()) {
215  if ((!MODef.isReg()) || (!MODef.isDef()))
216  continue;
217  unsigned DefReg = MODef.getReg();
218  if (!TRI->isVirtualRegister(DefReg)) {
219  IsDead = false;
220  break;
221  }
222  for (MachineInstr &Use : MRI->use_instructions(Reg)) {
223  // We don't care about self references.
224  if (&Use == Def)
225  continue;
226  if (DeadInstr.find(&Use) == DeadInstr.end()) {
227  IsDead = false;
228  break;
229  }
230  }
231  }
232 
233  if (!IsDead) continue;
234 
235  LLVM_DEBUG(dbgs() << "Deleting instruction " << *Def << "\n");
236  DeadInstr.insert(Def);
237  }
238  }
239 }
240 
241 // Creates the more optimized patterns and generally does all the code
242 // transformations in this pass.
243 unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) {
244  if (MI->isCopy()) {
245  return optimizeAllLanesPattern(MI, MI->getOperand(1).getReg());
246  }
247 
248  if (MI->isInsertSubreg()) {
249  unsigned DPRReg = MI->getOperand(1).getReg();
250  unsigned SPRReg = MI->getOperand(2).getReg();
251 
252  if (TRI->isVirtualRegister(DPRReg) && TRI->isVirtualRegister(SPRReg)) {
253  MachineInstr *DPRMI = MRI->getVRegDef(MI->getOperand(1).getReg());
254  MachineInstr *SPRMI = MRI->getVRegDef(MI->getOperand(2).getReg());
255 
256  if (DPRMI && SPRMI) {
257  // See if the first operand of this insert_subreg is IMPLICIT_DEF
258  MachineInstr *ECDef = elideCopies(DPRMI);
259  if (ECDef && ECDef->isImplicitDef()) {
260  // Another corner case - if we're inserting something that is purely
261  // a subreg copy of a DPR, just use that DPR.
262 
263  MachineInstr *EC = elideCopies(SPRMI);
264  // Is it a subreg copy of ssub_0?
265  if (EC && EC->isCopy() &&
266  EC->getOperand(1).getSubReg() == ARM::ssub_0) {
267  LLVM_DEBUG(dbgs() << "Found a subreg copy: " << *SPRMI);
268 
269  // Find the thing we're subreg copying out of - is it of the same
270  // regclass as DPRMI? (i.e. a DPR or QPR).
271  unsigned FullReg = SPRMI->getOperand(1).getReg();
272  const TargetRegisterClass *TRC =
273  MRI->getRegClass(MI->getOperand(1).getReg());
274  if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) {
275  LLVM_DEBUG(dbgs() << "Subreg copy is compatible - returning ");
276  LLVM_DEBUG(dbgs() << printReg(FullReg) << "\n");
277  eraseInstrWithNoUses(MI);
278  return FullReg;
279  }
280  }
281 
282  return optimizeAllLanesPattern(MI, MI->getOperand(2).getReg());
283  }
284  }
285  }
286  return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
287  }
288 
289  if (MI->isRegSequence() && usesRegClass(MI->getOperand(1),
290  &ARM::SPRRegClass)) {
291  // See if all bar one of the operands are IMPLICIT_DEF and insert the
292  // optimizer pattern accordingly.
293  unsigned NumImplicit = 0, NumTotal = 0;
294  unsigned NonImplicitReg = ~0U;
295 
296  for (unsigned I = 1; I < MI->getNumExplicitOperands(); ++I) {
297  if (!MI->getOperand(I).isReg())
298  continue;
299  ++NumTotal;
300  unsigned OpReg = MI->getOperand(I).getReg();
301 
302  if (!TRI->isVirtualRegister(OpReg))
303  break;
304 
305  MachineInstr *Def = MRI->getVRegDef(OpReg);
306  if (!Def)
307  break;
308  if (Def->isImplicitDef())
309  ++NumImplicit;
310  else
311  NonImplicitReg = MI->getOperand(I).getReg();
312  }
313 
314  if (NumImplicit == NumTotal - 1)
315  return optimizeAllLanesPattern(MI, NonImplicitReg);
316  else
317  return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
318  }
319 
320  llvm_unreachable("Unhandled update pattern!");
321 }
322 
323 // Return true if this MachineInstr inserts a scalar (SPR) value into
324 // a D or Q register.
325 bool A15SDOptimizer::hasPartialWrite(MachineInstr *MI) {
326  // The only way we can do a partial register update is through a COPY,
327  // INSERT_SUBREG or REG_SEQUENCE.
328  if (MI->isCopy() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
329  return true;
330 
331  if (MI->isInsertSubreg() && usesRegClass(MI->getOperand(2),
332  &ARM::SPRRegClass))
333  return true;
334 
335  if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
336  return true;
337 
338  return false;
339 }
340 
341 // Looks through full copies to get the instruction that defines the input
342 // operand for MI.
343 MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) {
344  if (!MI->isFullCopy())
345  return MI;
346  if (!TRI->isVirtualRegister(MI->getOperand(1).getReg()))
347  return nullptr;
348  MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg());
349  if (!Def)
350  return nullptr;
351  return elideCopies(Def);
352 }
353 
354 // Look through full copies and PHIs to get the set of non-copy MachineInstrs
355 // that can produce MI.
356 void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI,
358  // Looking through PHIs may create loops so we need to track what
359  // instructions we have visited before.
360  std::set<MachineInstr *> Reached;
362  Front.push_back(MI);
363  while (Front.size() != 0) {
364  MI = Front.back();
365  Front.pop_back();
366 
367  // If we have already explored this MachineInstr, ignore it.
368  if (Reached.find(MI) != Reached.end())
369  continue;
370  Reached.insert(MI);
371  if (MI->isPHI()) {
372  for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
373  unsigned Reg = MI->getOperand(I).getReg();
374  if (!TRI->isVirtualRegister(Reg)) {
375  continue;
376  }
377  MachineInstr *NewMI = MRI->getVRegDef(Reg);
378  if (!NewMI)
379  continue;
380  Front.push_back(NewMI);
381  }
382  } else if (MI->isFullCopy()) {
383  if (!TRI->isVirtualRegister(MI->getOperand(1).getReg()))
384  continue;
385  MachineInstr *NewMI = MRI->getVRegDef(MI->getOperand(1).getReg());
386  if (!NewMI)
387  continue;
388  Front.push_back(NewMI);
389  } else {
390  LLVM_DEBUG(dbgs() << "Found partial copy" << *MI << "\n");
391  Outs.push_back(MI);
392  }
393  }
394 }
395 
396 // Return the DPR virtual registers that are read by this machine instruction
397 // (if any).
398 SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) {
399  if (MI->isCopyLike() || MI->isInsertSubreg() || MI->isRegSequence() ||
400  MI->isKill())
401  return SmallVector<unsigned, 8>();
402 
404  for (MachineOperand &MO : MI->operands()) {
405  if (!MO.isReg() || !MO.isUse())
406  continue;
407  if (!usesRegClass(MO, &ARM::DPRRegClass) &&
408  !usesRegClass(MO, &ARM::QPRRegClass) &&
409  !usesRegClass(MO, &ARM::DPairRegClass)) // Treat DPair as QPR
410  continue;
411 
412  Defs.push_back(MO.getReg());
413  }
414  return Defs;
415 }
416 
417 // Creates a DPR register from an SPR one by using a VDUP.
418 unsigned A15SDOptimizer::createDupLane(MachineBasicBlock &MBB,
419  MachineBasicBlock::iterator InsertBefore,
420  const DebugLoc &DL, unsigned Reg,
421  unsigned Lane, bool QPR) {
422  unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass :
423  &ARM::DPRRegClass);
424  BuildMI(MBB, InsertBefore, DL,
425  TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d), Out)
426  .addReg(Reg)
427  .addImm(Lane)
428  .add(predOps(ARMCC::AL));
429 
430  return Out;
431 }
432 
433 // Creates a SPR register from a DPR by copying the value in lane 0.
434 unsigned A15SDOptimizer::createExtractSubreg(
436  const DebugLoc &DL, unsigned DReg, unsigned Lane,
437  const TargetRegisterClass *TRC) {
438  unsigned Out = MRI->createVirtualRegister(TRC);
439  BuildMI(MBB,
440  InsertBefore,
441  DL,
442  TII->get(TargetOpcode::COPY), Out)
443  .addReg(DReg, 0, Lane);
444 
445  return Out;
446 }
447 
448 // Takes two SPR registers and creates a DPR by using a REG_SEQUENCE.
449 unsigned A15SDOptimizer::createRegSequence(
451  const DebugLoc &DL, unsigned Reg1, unsigned Reg2) {
452  unsigned Out = MRI->createVirtualRegister(&ARM::QPRRegClass);
453  BuildMI(MBB,
454  InsertBefore,
455  DL,
456  TII->get(TargetOpcode::REG_SEQUENCE), Out)
457  .addReg(Reg1)
458  .addImm(ARM::dsub_0)
459  .addReg(Reg2)
460  .addImm(ARM::dsub_1);
461  return Out;
462 }
463 
464 // Takes two DPR registers that have previously been VDUPed (Ssub0 and Ssub1)
465 // and merges them into one DPR register.
466 unsigned A15SDOptimizer::createVExt(MachineBasicBlock &MBB,
467  MachineBasicBlock::iterator InsertBefore,
468  const DebugLoc &DL, unsigned Ssub0,
469  unsigned Ssub1) {
470  unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
471  BuildMI(MBB, InsertBefore, DL, TII->get(ARM::VEXTd32), Out)
472  .addReg(Ssub0)
473  .addReg(Ssub1)
474  .addImm(1)
475  .add(predOps(ARMCC::AL));
476  return Out;
477 }
478 
479 unsigned A15SDOptimizer::createInsertSubreg(
481  const DebugLoc &DL, unsigned DReg, unsigned Lane, unsigned ToInsert) {
482  unsigned Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass);
483  BuildMI(MBB,
484  InsertBefore,
485  DL,
486  TII->get(TargetOpcode::INSERT_SUBREG), Out)
487  .addReg(DReg)
488  .addReg(ToInsert)
489  .addImm(Lane);
490 
491  return Out;
492 }
493 
494 unsigned
495 A15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB,
496  MachineBasicBlock::iterator InsertBefore,
497  const DebugLoc &DL) {
498  unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
499  BuildMI(MBB,
500  InsertBefore,
501  DL,
502  TII->get(TargetOpcode::IMPLICIT_DEF), Out);
503  return Out;
504 }
505 
506 // This function inserts instructions in order to optimize interactions between
507 // SPR registers and DPR/QPR registers. It does so by performing VDUPs on all
508 // lanes, and the using VEXT instructions to recompose the result.
509 unsigned
510 A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) {
511  MachineBasicBlock::iterator InsertPt(MI);
512  DebugLoc DL = MI->getDebugLoc();
513  MachineBasicBlock &MBB = *MI->getParent();
514  InsertPt++;
515  unsigned Out;
516 
517  // DPair has the same length as QPR and also has two DPRs as subreg.
518  // Treat DPair as QPR.
519  if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass) ||
520  MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPairRegClass)) {
521  unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg,
522  ARM::dsub_0, &ARM::DPRRegClass);
523  unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg,
524  ARM::dsub_1, &ARM::DPRRegClass);
525 
526  unsigned Out1 = createDupLane(MBB, InsertPt, DL, DSub0, 0);
527  unsigned Out2 = createDupLane(MBB, InsertPt, DL, DSub0, 1);
528  Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
529 
530  unsigned Out3 = createDupLane(MBB, InsertPt, DL, DSub1, 0);
531  unsigned Out4 = createDupLane(MBB, InsertPt, DL, DSub1, 1);
532  Out2 = createVExt(MBB, InsertPt, DL, Out3, Out4);
533 
534  Out = createRegSequence(MBB, InsertPt, DL, Out, Out2);
535 
536  } else if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPRRegClass)) {
537  unsigned Out1 = createDupLane(MBB, InsertPt, DL, Reg, 0);
538  unsigned Out2 = createDupLane(MBB, InsertPt, DL, Reg, 1);
539  Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
540 
541  } else {
542  assert(MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::SPRRegClass) &&
543  "Found unexpected regclass!");
544 
545  unsigned PrefLane = getPrefSPRLane(Reg);
546  unsigned Lane;
547  switch (PrefLane) {
548  case ARM::ssub_0: Lane = 0; break;
549  case ARM::ssub_1: Lane = 1; break;
550  default: llvm_unreachable("Unknown preferred lane!");
551  }
552 
553  // Treat DPair as QPR
554  bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass) ||
555  usesRegClass(MI->getOperand(0), &ARM::DPairRegClass);
556 
557  Out = createImplicitDef(MBB, InsertPt, DL);
558  Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg);
559  Out = createDupLane(MBB, InsertPt, DL, Out, Lane, UsesQPR);
560  eraseInstrWithNoUses(MI);
561  }
562  return Out;
563 }
564 
565 bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
566  // We look for instructions that write S registers that are then read as
567  // D/Q registers. These can only be caused by COPY, INSERT_SUBREG and
568  // REG_SEQUENCE pseudos that insert an SPR value into a DPR register or
569  // merge two SPR values to form a DPR register. In order avoid false
570  // positives we make sure that there is an SPR producer so we look past
571  // COPY and PHI nodes to find it.
572  //
573  // The best code pattern for when an SPR producer is going to be used by a
574  // DPR or QPR consumer depends on whether the other lanes of the
575  // corresponding DPR/QPR are currently defined.
576  //
577  // We can handle these efficiently, depending on the type of
578  // pseudo-instruction that is producing the pattern
579  //
580  // * COPY: * VDUP all lanes and merge the results together
581  // using VEXTs.
582  //
583  // * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR
584  // lane, and the other lane(s) of the DPR/QPR register
585  // that we are inserting in are undefined, use the
586  // original DPR/QPR value.
587  // * Otherwise, fall back on the same stategy as COPY.
588  //
589  // * REG_SEQUENCE: * If all except one of the input operands are
590  // IMPLICIT_DEFs, insert the VDUP pattern for just the
591  // defined input operand
592  // * Otherwise, fall back on the same stategy as COPY.
593  //
594 
595  // First, get all the reads of D-registers done by this instruction.
596  SmallVector<unsigned, 8> Defs = getReadDPRs(MI);
597  bool Modified = false;
598 
599  for (SmallVectorImpl<unsigned>::iterator I = Defs.begin(), E = Defs.end();
600  I != E; ++I) {
601  // Follow the def-use chain for this DPR through COPYs, and also through
602  // PHIs (which are essentially multi-way COPYs). It is because of PHIs that
603  // we can end up with multiple defs of this DPR.
604 
606  if (!TRI->isVirtualRegister(*I))
607  continue;
608  MachineInstr *Def = MRI->getVRegDef(*I);
609  if (!Def)
610  continue;
611 
612  elideCopiesAndPHIs(Def, DefSrcs);
613 
614  for (MachineInstr *MI : DefSrcs) {
615  // If we've already analyzed and replaced this operand, don't do
616  // anything.
617  if (Replacements.find(MI) != Replacements.end())
618  continue;
619 
620  // Now, work out if the instruction causes a SPR->DPR dependency.
621  if (!hasPartialWrite(MI))
622  continue;
623 
624  // Collect all the uses of this MI's DPR def for updating later.
626  unsigned DPRDefReg = MI->getOperand(0).getReg();
627  for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DPRDefReg),
628  E = MRI->use_end(); I != E; ++I)
629  Uses.push_back(&*I);
630 
631  // We can optimize this.
632  unsigned NewReg = optimizeSDPattern(MI);
633 
634  if (NewReg != 0) {
635  Modified = true;
637  E = Uses.end(); I != E; ++I) {
638  // Make sure to constrain the register class of the new register to
639  // match what we're replacing. Otherwise we can optimize a DPR_VFP2
640  // reference into a plain DPR, and that will end poorly. NewReg is
641  // always virtual here, so there will always be a matching subclass
642  // to find.
643  MRI->constrainRegClass(NewReg, MRI->getRegClass((*I)->getReg()));
644 
645  LLVM_DEBUG(dbgs() << "Replacing operand " << **I << " with "
646  << printReg(NewReg) << "\n");
647  (*I)->substVirtReg(NewReg, 0, *TRI);
648  }
649  }
650  Replacements[MI] = NewReg;
651  }
652  }
653  return Modified;
654 }
655 
656 bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
657  if (skipFunction(Fn.getFunction()))
658  return false;
659 
660  const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
661  // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be
662  // enabled when NEON is available.
663  if (!(STI.useSplatVFPToNeon() && STI.hasNEON()))
664  return false;
665 
666  TII = STI.getInstrInfo();
667  TRI = STI.getRegisterInfo();
668  MRI = &Fn.getRegInfo();
669  bool Modified = false;
670 
671  LLVM_DEBUG(dbgs() << "Running on function " << Fn.getName() << "\n");
672 
673  DeadInstr.clear();
674  Replacements.clear();
675 
676  for (MachineBasicBlock &MBB : Fn) {
677  for (MachineInstr &MI : MBB) {
678  Modified |= runOnInstruction(&MI);
679  }
680  }
681 
682  for (MachineInstr *MI : DeadInstr) {
683  MI->eraseFromParent();
684  }
685 
686  return Modified;
687 }
688 
690  return new A15SDOptimizer();
691 }
const MachineInstrBuilder & add(const MachineOperand &MO) const
FunctionPass * createA15SDOptimizerPass()
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool contains(unsigned Reg) const
Return true if the specified register is included in this register class.
bool IsDead
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:328
This class represents lattice values for constants.
Definition: AllocatorList.h:24
MachineOperand * findRegisterDefOperand(unsigned Reg, bool isDead=false, const TargetRegisterInfo *TRI=nullptr)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:383
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned Reg
unsigned getSubReg() const
bool isRegSequence() const
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:459
bool isCopyLike() const
Return true if the instruction behaves like a copy.
bool isPHI() const
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:491
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:412
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
bool isFullCopy() const
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned const MachineRegisterInfo * MRI
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
bool isCopy() const
bool isImplicitDef() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
size_t size() const
Definition: SmallVector.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
MachineOperand class - Representation of each machine instruction operand.
bool isInsertSubreg() const
bool hasNEON() const
Definition: ARMSubtarget.h:571
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:478
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:254
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
Definition: MachineInstr.h:64
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
#define I(x, y, z)
Definition: MD5.cpp:58
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:503
bool useSplatVFPToNeon() const
Definition: ARMSubtarget.h:617
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool isKill() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
#define LLVM_DEBUG(X)
Definition: Debug.h:123
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...