LLVM  8.0.1
PPCMIPeephole.cpp
Go to the documentation of this file.
1 //===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===---------------------------------------------------------------------===//
9 //
10 // This pass performs peephole optimizations to clean up ugly code
11 // sequences at the MachineInstruction layer. It runs at the end of
12 // the SSA phases, following VSX swap removal. A pass of dead code
13 // elimination follows this one for quick clean-up of any dead
14 // instructions introduced here. Although we could do this as callbacks
15 // from the generic peephole pass, this would have a couple of bad
16 // effects: it might remove optimization opportunities for VSX swap
17 // removal, and it would miss cleanups made possible following VSX
18 // swap removal.
19 //
20 //===---------------------------------------------------------------------===//
21 
22 #include "PPC.h"
23 #include "PPCInstrBuilder.h"
24 #include "PPCInstrInfo.h"
25 #include "PPCTargetMachine.h"
26 #include "llvm/ADT/Statistic.h"
31 #include "llvm/Support/Debug.h"
33 
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "ppc-mi-peepholes"
37 
38 STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
39 STATISTIC(MultiTOCSaves,
40  "Number of functions with multiple TOC saves that must be kept");
41 STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
42 STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
43 STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
44 STATISTIC(NumConvertedToImmediateForm,
45  "Number of instructions converted to their immediate form");
46 STATISTIC(NumFunctionsEnteredInMIPeephole,
47  "Number of functions entered in PPC MI Peepholes");
48 STATISTIC(NumFixedPointIterations,
49  "Number of fixed-point iterations converting reg-reg instructions "
50  "to reg-imm ones");
51 
52 static cl::opt<bool>
53 FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
54  cl::desc("Iterate to a fixed point when attempting to "
55  "convert reg-reg instructions to reg-imm"));
56 
57 static cl::opt<bool>
58 ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true),
59  cl::desc("Convert eligible reg+reg instructions to reg+imm"));
60 
61 static cl::opt<bool>
62  EnableSExtElimination("ppc-eliminate-signext",
63  cl::desc("enable elimination of sign-extensions"),
64  cl::init(false), cl::Hidden);
65 
66 static cl::opt<bool>
67  EnableZExtElimination("ppc-eliminate-zeroext",
68  cl::desc("enable elimination of zero-extensions"),
69  cl::init(false), cl::Hidden);
70 
71 namespace {
72 
73 struct PPCMIPeephole : public MachineFunctionPass {
74 
75  static char ID;
76  const PPCInstrInfo *TII;
77  MachineFunction *MF;
79 
80  PPCMIPeephole() : MachineFunctionPass(ID) {
82  }
83 
84 private:
86 
87  // Initialize class variables.
88  void initialize(MachineFunction &MFParm);
89 
90  // Perform peepholes.
91  bool simplifyCode(void);
92 
93  // Perform peepholes.
94  bool eliminateRedundantCompare(void);
95  bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
96  void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
97  MachineInstr *MI);
98 
99 public:
100 
101  void getAnalysisUsage(AnalysisUsage &AU) const override {
105  }
106 
107  // Main entry point for this pass.
108  bool runOnMachineFunction(MachineFunction &MF) override {
109  if (skipFunction(MF.getFunction()))
110  return false;
111  initialize(MF);
112  return simplifyCode();
113  }
114 };
115 
116 // Initialize class variables.
118  MF = &MFParm;
119  MRI = &MF->getRegInfo();
120  MDT = &getAnalysis<MachineDominatorTree>();
121  TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
122  LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
123  LLVM_DEBUG(MF->dump());
124 }
125 
126 static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
127  MachineRegisterInfo *MRI) {
128  assert(Op && "Invalid Operand!");
129  if (!Op->isReg())
130  return nullptr;
131 
132  unsigned Reg = Op->getReg();
134  return nullptr;
135 
136  return MRI->getVRegDef(Reg);
137 }
138 
139 // This function returns number of known zero bits in output of MI
140 // starting from the most significant bit.
141 static unsigned
142 getKnownLeadingZeroCount(MachineInstr *MI, const PPCInstrInfo *TII) {
143  unsigned Opcode = MI->getOpcode();
144  if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICLo ||
145  Opcode == PPC::RLDCL || Opcode == PPC::RLDCLo)
146  return MI->getOperand(3).getImm();
147 
148  if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDICo) &&
149  MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
150  return MI->getOperand(3).getImm();
151 
152  if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo ||
153  Opcode == PPC::RLWNM || Opcode == PPC::RLWNMo ||
154  Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
155  MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
156  return 32 + MI->getOperand(3).getImm();
157 
158  if (Opcode == PPC::ANDIo) {
159  uint16_t Imm = MI->getOperand(2).getImm();
160  return 48 + countLeadingZeros(Imm);
161  }
162 
163  if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZWo ||
164  Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZWo ||
165  Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8)
166  // The result ranges from 0 to 32.
167  return 58;
168 
169  if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZDo ||
170  Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZDo)
171  // The result ranges from 0 to 64.
172  return 57;
173 
174  if (Opcode == PPC::LHZ || Opcode == PPC::LHZX ||
175  Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 ||
176  Opcode == PPC::LHZU || Opcode == PPC::LHZUX ||
177  Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8)
178  return 48;
179 
180  if (Opcode == PPC::LBZ || Opcode == PPC::LBZX ||
181  Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 ||
182  Opcode == PPC::LBZU || Opcode == PPC::LBZUX ||
183  Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8)
184  return 56;
185 
186  if (TII->isZeroExtended(*MI))
187  return 32;
188 
189  return 0;
190 }
191 
192 // This function maintains a map for the pairs <TOC Save Instr, Keep>
193 // Each time a new TOC save is encountered, it checks if any of the existing
194 // ones are dominated by the new one. If so, it marks the existing one as
195 // redundant by setting it's entry in the map as false. It then adds the new
196 // instruction to the map with either true or false depending on if any
197 // existing instructions dominated the new one.
198 void PPCMIPeephole::UpdateTOCSaves(
199  std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) {
200  assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
201  bool Keep = true;
202  for (auto It = TOCSaves.begin(); It != TOCSaves.end(); It++ ) {
203  MachineInstr *CurrInst = It->first;
204  // If new instruction dominates an existing one, mark existing one as
205  // redundant.
206  if (It->second && MDT->dominates(MI, CurrInst))
207  It->second = false;
208  // Check if the new instruction is redundant.
209  if (MDT->dominates(CurrInst, MI)) {
210  Keep = false;
211  break;
212  }
213  }
214  // Add new instruction to map.
215  TOCSaves[MI] = Keep;
216 }
217 
218 // Perform peephole optimizations.
219 bool PPCMIPeephole::simplifyCode(void) {
220  bool Simplified = false;
221  MachineInstr* ToErase = nullptr;
222  std::map<MachineInstr *, bool> TOCSaves;
223  const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
224  NumFunctionsEnteredInMIPeephole++;
225  if (ConvertRegReg) {
226  // Fixed-point conversion of reg/reg instructions fed by load-immediate
227  // into reg/imm instructions. FIXME: This is expensive, control it with
228  // an option.
229  bool SomethingChanged = false;
230  do {
231  NumFixedPointIterations++;
232  SomethingChanged = false;
233  for (MachineBasicBlock &MBB : *MF) {
234  for (MachineInstr &MI : MBB) {
235  if (MI.isDebugInstr())
236  continue;
237 
238  if (TII->convertToImmediateForm(MI)) {
239  // We don't erase anything in case the def has other uses. Let DCE
240  // remove it if it can be removed.
241  LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
242  LLVM_DEBUG(MI.dump());
243  NumConvertedToImmediateForm++;
244  SomethingChanged = true;
245  Simplified = true;
246  continue;
247  }
248  }
249  }
250  } while (SomethingChanged && FixedPointRegToImm);
251  }
252 
253  for (MachineBasicBlock &MBB : *MF) {
254  for (MachineInstr &MI : MBB) {
255 
256  // If the previous instruction was marked for elimination,
257  // remove it now.
258  if (ToErase) {
259  ToErase->eraseFromParent();
260  ToErase = nullptr;
261  }
262 
263  // Ignore debug instructions.
264  if (MI.isDebugInstr())
265  continue;
266 
267  // Per-opcode peepholes.
268  switch (MI.getOpcode()) {
269 
270  default:
271  break;
272 
273  case PPC::STD: {
274  MachineFrameInfo &MFI = MF->getFrameInfo();
275  if (MFI.hasVarSizedObjects() ||
276  !MF->getSubtarget<PPCSubtarget>().isELFv2ABI())
277  break;
278  // When encountering a TOC save instruction, call UpdateTOCSaves
279  // to add it to the TOCSaves map and mark any existing TOC saves
280  // it dominates as redundant.
281  if (TII->isTOCSaveMI(MI))
282  UpdateTOCSaves(TOCSaves, &MI);
283  break;
284  }
285  case PPC::XXPERMDI: {
286  // Perform simplifications of 2x64 vector swaps and splats.
287  // A swap is identified by an immediate value of 2, and a splat
288  // is identified by an immediate value of 0 or 3.
289  int Immed = MI.getOperand(3).getImm();
290 
291  if (Immed != 1) {
292 
293  // For each of these simplifications, we need the two source
294  // regs to match. Unfortunately, MachineCSE ignores COPY and
295  // SUBREG_TO_REG, so for example we can see
296  // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
297  // We have to look through chains of COPY and SUBREG_TO_REG
298  // to find the real source values for comparison.
299  unsigned TrueReg1 =
300  TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
301  unsigned TrueReg2 =
302  TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
303 
304  if (TrueReg1 == TrueReg2
306  MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
307  unsigned DefOpc = DefMI ? DefMI->getOpcode() : 0;
308 
309  // If this is a splat fed by a splatting load, the splat is
310  // redundant. Replace with a copy. This doesn't happen directly due
311  // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
312  // a load of a double to a vector of 64-bit integers.
313  auto isConversionOfLoadAndSplat = [=]() -> bool {
314  if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
315  return false;
316  unsigned DefReg =
317  TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
319  MachineInstr *LoadMI = MRI->getVRegDef(DefReg);
320  if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
321  return true;
322  }
323  return false;
324  };
325  if (DefMI && (Immed == 0 || Immed == 3)) {
326  if (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat()) {
327  LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat "
328  "to load-and-splat/copy: ");
329  LLVM_DEBUG(MI.dump());
330  BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
331  MI.getOperand(0).getReg())
332  .add(MI.getOperand(1));
333  ToErase = &MI;
334  Simplified = true;
335  }
336  }
337 
338  // If this is a splat or a swap fed by another splat, we
339  // can replace it with a copy.
340  if (DefOpc == PPC::XXPERMDI) {
341  unsigned FeedImmed = DefMI->getOperand(3).getImm();
342  unsigned FeedReg1 =
343  TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
344  unsigned FeedReg2 =
345  TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
346 
347  if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
348  LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
349  "to splat/copy: ");
350  LLVM_DEBUG(MI.dump());
351  BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
352  MI.getOperand(0).getReg())
353  .add(MI.getOperand(1));
354  ToErase = &MI;
355  Simplified = true;
356  }
357 
358  // If this is a splat fed by a swap, we can simplify modify
359  // the splat to splat the other value from the swap's input
360  // parameter.
361  else if ((Immed == 0 || Immed == 3)
362  && FeedImmed == 2 && FeedReg1 == FeedReg2) {
363  LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
364  LLVM_DEBUG(MI.dump());
365  MI.getOperand(1).setReg(DefMI->getOperand(1).getReg());
366  MI.getOperand(2).setReg(DefMI->getOperand(2).getReg());
367  MI.getOperand(3).setImm(3 - Immed);
368  Simplified = true;
369  }
370 
371  // If this is a swap fed by a swap, we can replace it
372  // with a copy from the first swap's input.
373  else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
374  LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
375  LLVM_DEBUG(MI.dump());
376  BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
377  MI.getOperand(0).getReg())
378  .add(DefMI->getOperand(1));
379  ToErase = &MI;
380  Simplified = true;
381  }
382  } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs &&
383  (DefMI->getOperand(2).getImm() == 0 ||
384  DefMI->getOperand(2).getImm() == 3)) {
385  // Splat fed by another splat - switch the output of the first
386  // and remove the second.
387  DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
388  ToErase = &MI;
389  Simplified = true;
390  LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
391  LLVM_DEBUG(MI.dump());
392  }
393  }
394  }
395  break;
396  }
397  case PPC::VSPLTB:
398  case PPC::VSPLTH:
399  case PPC::XXSPLTW: {
400  unsigned MyOpcode = MI.getOpcode();
401  unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
402  unsigned TrueReg =
403  TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
405  break;
406  MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
407  if (!DefMI)
408  break;
409  unsigned DefOpcode = DefMI->getOpcode();
410  auto isConvertOfSplat = [=]() -> bool {
411  if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
412  return false;
413  unsigned ConvReg = DefMI->getOperand(1).getReg();
415  return false;
416  MachineInstr *Splt = MRI->getVRegDef(ConvReg);
417  return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
418  Splt->getOpcode() == PPC::XXSPLTW);
419  };
420  bool AlreadySplat = (MyOpcode == DefOpcode) ||
421  (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
422  (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
423  (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) ||
424  (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
425  (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
426  (MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
427  // If the instruction[s] that feed this splat have already splat
428  // the value, this splat is redundant.
429  if (AlreadySplat) {
430  LLVM_DEBUG(dbgs() << "Changing redundant splat to a copy: ");
431  LLVM_DEBUG(MI.dump());
432  BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
433  MI.getOperand(0).getReg())
434  .add(MI.getOperand(OpNo));
435  ToErase = &MI;
436  Simplified = true;
437  }
438  // Splat fed by a shift. Usually when we align value to splat into
439  // vector element zero.
440  if (DefOpcode == PPC::XXSLDWI) {
441  unsigned ShiftRes = DefMI->getOperand(0).getReg();
442  unsigned ShiftOp1 = DefMI->getOperand(1).getReg();
443  unsigned ShiftOp2 = DefMI->getOperand(2).getReg();
444  unsigned ShiftImm = DefMI->getOperand(3).getImm();
445  unsigned SplatImm = MI.getOperand(2).getImm();
446  if (ShiftOp1 == ShiftOp2) {
447  unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
448  if (MRI->hasOneNonDBGUse(ShiftRes)) {
449  LLVM_DEBUG(dbgs() << "Removing redundant shift: ");
450  LLVM_DEBUG(DefMI->dump());
451  ToErase = DefMI;
452  }
453  Simplified = true;
454  LLVM_DEBUG(dbgs() << "Changing splat immediate from " << SplatImm
455  << " to " << NewElem << " in instruction: ");
456  LLVM_DEBUG(MI.dump());
457  MI.getOperand(1).setReg(ShiftOp1);
458  MI.getOperand(2).setImm(NewElem);
459  }
460  }
461  break;
462  }
463  case PPC::XVCVDPSP: {
464  // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
465  unsigned TrueReg =
466  TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
468  break;
469  MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
470 
471  // This can occur when building a vector of single precision or integer
472  // values.
473  if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
474  unsigned DefsReg1 =
475  TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
476  unsigned DefsReg2 =
477  TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
478  if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) ||
480  break;
481  MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
482  MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
483 
484  if (!P1 || !P2)
485  break;
486 
487  // Remove the passed FRSP instruction if it only feeds this MI and
488  // set any uses of that FRSP (in this MI) to the source of the FRSP.
489  auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
490  if (RoundInstr->getOpcode() == PPC::FRSP &&
491  MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
492  Simplified = true;
493  unsigned ConvReg1 = RoundInstr->getOperand(1).getReg();
494  unsigned FRSPDefines = RoundInstr->getOperand(0).getReg();
495  MachineInstr &Use = *(MRI->use_instr_begin(FRSPDefines));
496  for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
497  if (Use.getOperand(i).isReg() &&
498  Use.getOperand(i).getReg() == FRSPDefines)
499  Use.getOperand(i).setReg(ConvReg1);
500  LLVM_DEBUG(dbgs() << "Removing redundant FRSP:\n");
501  LLVM_DEBUG(RoundInstr->dump());
502  LLVM_DEBUG(dbgs() << "As it feeds instruction:\n");
503  LLVM_DEBUG(MI.dump());
504  LLVM_DEBUG(dbgs() << "Through instruction:\n");
505  LLVM_DEBUG(DefMI->dump());
506  RoundInstr->eraseFromParent();
507  }
508  };
509 
510  // If the input to XVCVDPSP is a vector that was built (even
511  // partially) out of FRSP's, the FRSP(s) can safely be removed
512  // since this instruction performs the same operation.
513  if (P1 != P2) {
514  removeFRSPIfPossible(P1);
515  removeFRSPIfPossible(P2);
516  break;
517  }
518  removeFRSPIfPossible(P1);
519  }
520  break;
521  }
522  case PPC::EXTSH:
523  case PPC::EXTSH8:
524  case PPC::EXTSH8_32_64: {
525  if (!EnableSExtElimination) break;
526  unsigned NarrowReg = MI.getOperand(1).getReg();
528  break;
529 
530  MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
531  // If we've used a zero-extending load that we will sign-extend,
532  // just do a sign-extending load.
533  if (SrcMI->getOpcode() == PPC::LHZ ||
534  SrcMI->getOpcode() == PPC::LHZX) {
535  if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
536  break;
537  auto is64Bit = [] (unsigned Opcode) {
538  return Opcode == PPC::EXTSH8;
539  };
540  auto isXForm = [] (unsigned Opcode) {
541  return Opcode == PPC::LHZX;
542  };
543  auto getSextLoadOp = [] (bool is64Bit, bool isXForm) {
544  if (is64Bit)
545  if (isXForm) return PPC::LHAX8;
546  else return PPC::LHA8;
547  else
548  if (isXForm) return PPC::LHAX;
549  else return PPC::LHA;
550  };
551  unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()),
552  isXForm(SrcMI->getOpcode()));
553  LLVM_DEBUG(dbgs() << "Zero-extending load\n");
554  LLVM_DEBUG(SrcMI->dump());
555  LLVM_DEBUG(dbgs() << "and sign-extension\n");
556  LLVM_DEBUG(MI.dump());
557  LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
558  SrcMI->setDesc(TII->get(Opc));
559  SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
560  ToErase = &MI;
561  Simplified = true;
562  NumEliminatedSExt++;
563  }
564  break;
565  }
566  case PPC::EXTSW:
567  case PPC::EXTSW_32:
568  case PPC::EXTSW_32_64: {
569  if (!EnableSExtElimination) break;
570  unsigned NarrowReg = MI.getOperand(1).getReg();
572  break;
573 
574  MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
575  // If we've used a zero-extending load that we will sign-extend,
576  // just do a sign-extending load.
577  if (SrcMI->getOpcode() == PPC::LWZ ||
578  SrcMI->getOpcode() == PPC::LWZX) {
579  if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
580  break;
581  auto is64Bit = [] (unsigned Opcode) {
582  return Opcode == PPC::EXTSW || Opcode == PPC::EXTSW_32_64;
583  };
584  auto isXForm = [] (unsigned Opcode) {
585  return Opcode == PPC::LWZX;
586  };
587  auto getSextLoadOp = [] (bool is64Bit, bool isXForm) {
588  if (is64Bit)
589  if (isXForm) return PPC::LWAX;
590  else return PPC::LWA;
591  else
592  if (isXForm) return PPC::LWAX_32;
593  else return PPC::LWA_32;
594  };
595  unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()),
596  isXForm(SrcMI->getOpcode()));
597  LLVM_DEBUG(dbgs() << "Zero-extending load\n");
598  LLVM_DEBUG(SrcMI->dump());
599  LLVM_DEBUG(dbgs() << "and sign-extension\n");
600  LLVM_DEBUG(MI.dump());
601  LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
602  SrcMI->setDesc(TII->get(Opc));
603  SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
604  ToErase = &MI;
605  Simplified = true;
606  NumEliminatedSExt++;
607  } else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
608  TII->isSignExtended(*SrcMI)) {
609  // We can eliminate EXTSW if the input is known to be already
610  // sign-extended.
611  LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
612  unsigned TmpReg =
613  MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
614  BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF),
615  TmpReg);
616  BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG),
617  MI.getOperand(0).getReg())
618  .addReg(TmpReg)
619  .addReg(NarrowReg)
620  .addImm(PPC::sub_32);
621  ToErase = &MI;
622  Simplified = true;
623  NumEliminatedSExt++;
624  }
625  break;
626  }
627  case PPC::RLDICL: {
628  // We can eliminate RLDICL (e.g. for zero-extension)
629  // if all bits to clear are already zero in the input.
630  // This code assume following code sequence for zero-extension.
631  // %6 = COPY %5:sub_32; (optional)
632  // %8 = IMPLICIT_DEF;
633  // %7<def,tied1> = INSERT_SUBREG %8<tied0>, %6, sub_32;
634  if (!EnableZExtElimination) break;
635 
636  if (MI.getOperand(2).getImm() != 0)
637  break;
638 
639  unsigned SrcReg = MI.getOperand(1).getReg();
641  break;
642 
643  MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
644  if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG &&
645  SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg()))
646  break;
647 
648  MachineInstr *ImpDefMI, *SubRegMI;
649  ImpDefMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
650  SubRegMI = MRI->getVRegDef(SrcMI->getOperand(2).getReg());
651  if (ImpDefMI->getOpcode() != PPC::IMPLICIT_DEF) break;
652 
653  SrcMI = SubRegMI;
654  if (SubRegMI->getOpcode() == PPC::COPY) {
655  unsigned CopyReg = SubRegMI->getOperand(1).getReg();
657  SrcMI = MRI->getVRegDef(CopyReg);
658  }
659 
660  unsigned KnownZeroCount = getKnownLeadingZeroCount(SrcMI, TII);
661  if (MI.getOperand(3).getImm() <= KnownZeroCount) {
662  LLVM_DEBUG(dbgs() << "Removing redundant zero-extension\n");
663  BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
664  MI.getOperand(0).getReg())
665  .addReg(SrcReg);
666  ToErase = &MI;
667  Simplified = true;
668  NumEliminatedZExt++;
669  }
670  break;
671  }
672 
673  // TODO: Any instruction that has an immediate form fed only by a PHI
674  // whose operands are all load immediate can be folded away. We currently
675  // do this for ADD instructions, but should expand it to arithmetic and
676  // binary instructions with immediate forms in the future.
677  case PPC::ADD4:
678  case PPC::ADD8: {
679  auto isSingleUsePHI = [&](MachineOperand *PhiOp) {
680  assert(PhiOp && "Invalid Operand!");
681  MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
682 
683  return DefPhiMI && (DefPhiMI->getOpcode() == PPC::PHI) &&
684  MRI->hasOneNonDBGUse(DefPhiMI->getOperand(0).getReg());
685  };
686 
687  auto dominatesAllSingleUseLIs = [&](MachineOperand *DominatorOp,
688  MachineOperand *PhiOp) {
689  assert(PhiOp && "Invalid Operand!");
690  assert(DominatorOp && "Invalid Operand!");
691  MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
692  MachineInstr *DefDomMI = getVRegDefOrNull(DominatorOp, MRI);
693 
694  // Note: the vregs only show up at odd indices position of PHI Node,
695  // the even indices position save the BB info.
696  for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
697  MachineInstr *LiMI =
698  getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
699  if (!LiMI ||
700  (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8)
701  || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) ||
702  !MDT->dominates(DefDomMI, LiMI))
703  return false;
704  }
705 
706  return true;
707  };
708 
709  MachineOperand Op1 = MI.getOperand(1);
710  MachineOperand Op2 = MI.getOperand(2);
711  if (isSingleUsePHI(&Op2) && dominatesAllSingleUseLIs(&Op1, &Op2))
712  std::swap(Op1, Op2);
713  else if (!isSingleUsePHI(&Op1) || !dominatesAllSingleUseLIs(&Op2, &Op1))
714  break; // We don't have an ADD fed by LI's that can be transformed
715 
716  // Now we know that Op1 is the PHI node and Op2 is the dominator
717  unsigned DominatorReg = Op2.getReg();
718 
719  const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8
720  ? &PPC::G8RC_and_G8RC_NOX0RegClass
721  : &PPC::GPRC_and_GPRC_NOR0RegClass;
722  MRI->setRegClass(DominatorReg, TRC);
723 
724  // replace LIs with ADDIs
725  MachineInstr *DefPhiMI = getVRegDefOrNull(&Op1, MRI);
726  for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
727  MachineInstr *LiMI = getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
728  LLVM_DEBUG(dbgs() << "Optimizing LI to ADDI: ");
729  LLVM_DEBUG(LiMI->dump());
730 
731  // There could be repeated registers in the PHI, e.g: %1 =
732  // PHI %6, <%bb.2>, %8, <%bb.3>, %8, <%bb.6>; So if we've
733  // already replaced the def instruction, skip.
734  if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8)
735  continue;
736 
737  assert((LiMI->getOpcode() == PPC::LI ||
738  LiMI->getOpcode() == PPC::LI8) &&
739  "Invalid Opcode!");
740  auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI
741  LiMI->RemoveOperand(1); // remove the imm of LI
742  LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI
743  : PPC::ADDI8));
744  MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI)
745  .addReg(DominatorReg)
746  .addImm(LiImm); // restore the imm of LI
747  LLVM_DEBUG(LiMI->dump());
748  }
749 
750  // Replace ADD with COPY
751  LLVM_DEBUG(dbgs() << "Optimizing ADD to COPY: ");
752  LLVM_DEBUG(MI.dump());
753  BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
754  MI.getOperand(0).getReg())
755  .add(Op1);
756  ToErase = &MI;
757  Simplified = true;
758  NumOptADDLIs++;
759  break;
760  }
761  }
762  }
763 
764  // If the last instruction was marked for elimination,
765  // remove it now.
766  if (ToErase) {
767  ToErase->eraseFromParent();
768  ToErase = nullptr;
769  }
770  }
771 
772  // Eliminate all the TOC save instructions which are redundant.
773  Simplified |= eliminateRedundantTOCSaves(TOCSaves);
774  // We try to eliminate redundant compare instruction.
775  Simplified |= eliminateRedundantCompare();
776 
777  return Simplified;
778 }
779 
780 // helper functions for eliminateRedundantCompare
781 static bool isEqOrNe(MachineInstr *BI) {
783  unsigned PredCond = PPC::getPredicateCondition(Pred);
784  return (PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE);
785 }
786 
787 static bool isSupportedCmpOp(unsigned opCode) {
788  return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
789  opCode == PPC::CMPLW || opCode == PPC::CMPW ||
790  opCode == PPC::CMPLDI || opCode == PPC::CMPDI ||
791  opCode == PPC::CMPLWI || opCode == PPC::CMPWI);
792 }
793 
794 static bool is64bitCmpOp(unsigned opCode) {
795  return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
796  opCode == PPC::CMPLDI || opCode == PPC::CMPDI);
797 }
798 
799 static bool isSignedCmpOp(unsigned opCode) {
800  return (opCode == PPC::CMPD || opCode == PPC::CMPW ||
801  opCode == PPC::CMPDI || opCode == PPC::CMPWI);
802 }
803 
804 static unsigned getSignedCmpOpCode(unsigned opCode) {
805  if (opCode == PPC::CMPLD) return PPC::CMPD;
806  if (opCode == PPC::CMPLW) return PPC::CMPW;
807  if (opCode == PPC::CMPLDI) return PPC::CMPDI;
808  if (opCode == PPC::CMPLWI) return PPC::CMPWI;
809  return opCode;
810 }
811 
812 // We can decrement immediate x in (GE x) by changing it to (GT x-1) or
813 // (LT x) to (LE x-1)
814 static unsigned getPredicateToDecImm(MachineInstr *BI, MachineInstr *CMPI) {
815  uint64_t Imm = CMPI->getOperand(2).getImm();
816  bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
817  if ((!SignedCmp && Imm == 0) || (SignedCmp && Imm == 0x8000))
818  return 0;
819 
821  unsigned PredCond = PPC::getPredicateCondition(Pred);
822  unsigned PredHint = PPC::getPredicateHint(Pred);
823  if (PredCond == PPC::PRED_GE)
824  return PPC::getPredicate(PPC::PRED_GT, PredHint);
825  if (PredCond == PPC::PRED_LT)
826  return PPC::getPredicate(PPC::PRED_LE, PredHint);
827 
828  return 0;
829 }
830 
831 // We can increment immediate x in (GT x) by changing it to (GE x+1) or
832 // (LE x) to (LT x+1)
833 static unsigned getPredicateToIncImm(MachineInstr *BI, MachineInstr *CMPI) {
834  uint64_t Imm = CMPI->getOperand(2).getImm();
835  bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
836  if ((!SignedCmp && Imm == 0xFFFF) || (SignedCmp && Imm == 0x7FFF))
837  return 0;
838 
840  unsigned PredCond = PPC::getPredicateCondition(Pred);
841  unsigned PredHint = PPC::getPredicateHint(Pred);
842  if (PredCond == PPC::PRED_GT)
843  return PPC::getPredicate(PPC::PRED_GE, PredHint);
844  if (PredCond == PPC::PRED_LE)
845  return PPC::getPredicate(PPC::PRED_LT, PredHint);
846 
847  return 0;
848 }
849 
850 // This takes a Phi node and returns a register value for the specified BB.
851 static unsigned getIncomingRegForBlock(MachineInstr *Phi,
852  MachineBasicBlock *MBB) {
853  for (unsigned I = 2, E = Phi->getNumOperands() + 1; I != E; I += 2) {
854  MachineOperand &MO = Phi->getOperand(I);
855  if (MO.getMBB() == MBB)
856  return Phi->getOperand(I-1).getReg();
857  }
858  llvm_unreachable("invalid src basic block for this Phi node\n");
859  return 0;
860 }
861 
862 // This function tracks the source of the register through register copy.
863 // If BB1 and BB2 are non-NULL, we also track PHI instruction in BB2
864 // assuming that the control comes from BB1 into BB2.
865 static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1,
867  unsigned SrcReg = Reg;
868  while (1) {
869  unsigned NextReg = SrcReg;
870  MachineInstr *Inst = MRI->getVRegDef(SrcReg);
871  if (BB1 && Inst->getOpcode() == PPC::PHI && Inst->getParent() == BB2) {
872  NextReg = getIncomingRegForBlock(Inst, BB1);
873  // We track through PHI only once to avoid infinite loop.
874  BB1 = nullptr;
875  }
876  else if (Inst->isFullCopy())
877  NextReg = Inst->getOperand(1).getReg();
878  if (NextReg == SrcReg || !TargetRegisterInfo::isVirtualRegister(NextReg))
879  break;
880  SrcReg = NextReg;
881  }
882  return SrcReg;
883 }
884 
885 static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
886  MachineBasicBlock *&PredMBB,
887  MachineBasicBlock *&MBBtoMoveCmp,
888  MachineRegisterInfo *MRI) {
889 
890  auto isEligibleBB = [&](MachineBasicBlock &BB) {
891  auto BII = BB.getFirstInstrTerminator();
892  // We optimize BBs ending with a conditional branch.
893  // We check only for BCC here, not BCCLR, because BCCLR
894  // will be formed only later in the pipeline.
895  if (BB.succ_size() == 2 &&
896  BII != BB.instr_end() &&
897  (*BII).getOpcode() == PPC::BCC &&
898  (*BII).getOperand(1).isReg()) {
899  // We optimize only if the condition code is used only by one BCC.
900  unsigned CndReg = (*BII).getOperand(1).getReg();
902  !MRI->hasOneNonDBGUse(CndReg))
903  return false;
904 
905  MachineInstr *CMPI = MRI->getVRegDef(CndReg);
906  // We assume compare and branch are in the same BB for ease of analysis.
907  if (CMPI->getParent() != &BB)
908  return false;
909 
910  // We skip this BB if a physical register is used in comparison.
911  for (MachineOperand &MO : CMPI->operands())
913  return false;
914 
915  return true;
916  }
917  return false;
918  };
919 
920  // If this BB has more than one successor, we can create a new BB and
921  // move the compare instruction in the new BB.
922  // So far, we do not move compare instruction to a BB having multiple
923  // successors to avoid potentially increasing code size.
924  auto isEligibleForMoveCmp = [](MachineBasicBlock &BB) {
925  return BB.succ_size() == 1;
926  };
927 
928  if (!isEligibleBB(MBB))
929  return false;
930 
931  unsigned NumPredBBs = MBB.pred_size();
932  if (NumPredBBs == 1) {
933  MachineBasicBlock *TmpMBB = *MBB.pred_begin();
934  if (isEligibleBB(*TmpMBB)) {
935  PredMBB = TmpMBB;
936  MBBtoMoveCmp = nullptr;
937  return true;
938  }
939  }
940  else if (NumPredBBs == 2) {
941  // We check for partially redundant case.
942  // So far, we support cases with only two predecessors
943  // to avoid increasing the number of instructions.
945  MachineBasicBlock *Pred1MBB = *PI;
946  MachineBasicBlock *Pred2MBB = *(PI+1);
947 
948  if (isEligibleBB(*Pred1MBB) && isEligibleForMoveCmp(*Pred2MBB)) {
949  // We assume Pred1MBB is the BB containing the compare to be merged and
950  // Pred2MBB is the BB to which we will append a compare instruction.
951  // Hence we can proceed as is.
952  }
953  else if (isEligibleBB(*Pred2MBB) && isEligibleForMoveCmp(*Pred1MBB)) {
954  // We need to swap Pred1MBB and Pred2MBB to canonicalize.
955  std::swap(Pred1MBB, Pred2MBB);
956  }
957  else return false;
958 
959  // Here, Pred2MBB is the BB to which we need to append a compare inst.
960  // We cannot move the compare instruction if operands are not available
961  // in Pred2MBB (i.e. defined in MBB by an instruction other than PHI).
963  MachineInstr *CMPI = MRI->getVRegDef(BI->getOperand(1).getReg());
964  for (int I = 1; I <= 2; I++)
965  if (CMPI->getOperand(I).isReg()) {
966  MachineInstr *Inst = MRI->getVRegDef(CMPI->getOperand(I).getReg());
967  if (Inst->getParent() == &MBB && Inst->getOpcode() != PPC::PHI)
968  return false;
969  }
970 
971  PredMBB = Pred1MBB;
972  MBBtoMoveCmp = Pred2MBB;
973  return true;
974  }
975 
976  return false;
977 }
978 
979 // This function will iterate over the input map containing a pair of TOC save
980 // instruction and a flag. The flag will be set to false if the TOC save is
981 // proven redundant. This function will erase from the basic block all the TOC
982 // saves marked as redundant.
983 bool PPCMIPeephole::eliminateRedundantTOCSaves(
984  std::map<MachineInstr *, bool> &TOCSaves) {
985  bool Simplified = false;
986  int NumKept = 0;
987  for (auto TOCSave : TOCSaves) {
988  if (!TOCSave.second) {
989  TOCSave.first->eraseFromParent();
990  RemoveTOCSave++;
991  Simplified = true;
992  } else {
993  NumKept++;
994  }
995  }
996 
997  if (NumKept > 1)
998  MultiTOCSaves++;
999 
1000  return Simplified;
1001 }
1002 
1003 // If multiple conditional branches are executed based on the (essentially)
1004 // same comparison, we merge compare instructions into one and make multiple
1005 // conditional branches on this comparison.
1006 // For example,
1007 // if (a == 0) { ... }
1008 // else if (a < 0) { ... }
1009 // can be executed by one compare and two conditional branches instead of
1010 // two pairs of a compare and a conditional branch.
1011 //
1012 // This method merges two compare instructions in two MBBs and modifies the
1013 // compare and conditional branch instructions if needed.
1014 // For the above example, the input for this pass looks like:
1015 // cmplwi r3, 0
1016 // beq 0, .LBB0_3
1017 // cmpwi r3, -1
1018 // bgt 0, .LBB0_4
1019 // So, before merging two compares, we need to modify these instructions as
1020 // cmpwi r3, 0 ; cmplwi and cmpwi yield same result for beq
1021 // beq 0, .LBB0_3
1022 // cmpwi r3, 0 ; greather than -1 means greater or equal to 0
1023 // bge 0, .LBB0_4
1024 
1025 bool PPCMIPeephole::eliminateRedundantCompare(void) {
1026  bool Simplified = false;
1027 
1028  for (MachineBasicBlock &MBB2 : *MF) {
1029  MachineBasicBlock *MBB1 = nullptr, *MBBtoMoveCmp = nullptr;
1030 
1031  // For fully redundant case, we select two basic blocks MBB1 and MBB2
1032  // as an optimization target if
1033  // - both MBBs end with a conditional branch,
1034  // - MBB1 is the only predecessor of MBB2, and
1035  // - compare does not take a physical register as a operand in both MBBs.
1036  // In this case, eligibleForCompareElimination sets MBBtoMoveCmp nullptr.
1037  //
1038  // As partially redundant case, we additionally handle if MBB2 has one
1039  // additional predecessor, which has only one successor (MBB2).
1040  // In this case, we move the compare instruction originally in MBB2 into
1041  // MBBtoMoveCmp. This partially redundant case is typically appear by
1042  // compiling a while loop; here, MBBtoMoveCmp is the loop preheader.
1043  //
1044  // Overview of CFG of related basic blocks
1045  // Fully redundant case Partially redundant case
1046  // -------- ---------------- --------
1047  // | MBB1 | (w/ 2 succ) | MBBtoMoveCmp | | MBB1 | (w/ 2 succ)
1048  // -------- ---------------- --------
1049  // | \ (w/ 1 succ) \ | \
1050  // | \ \ | \
1051  // | \ |
1052  // -------- --------
1053  // | MBB2 | (w/ 1 pred | MBB2 | (w/ 2 pred
1054  // -------- and 2 succ) -------- and 2 succ)
1055  // | \ | \
1056  // | \ | \
1057  //
1058  if (!eligibleForCompareElimination(MBB2, MBB1, MBBtoMoveCmp, MRI))
1059  continue;
1060 
1061  MachineInstr *BI1 = &*MBB1->getFirstInstrTerminator();
1062  MachineInstr *CMPI1 = MRI->getVRegDef(BI1->getOperand(1).getReg());
1063 
1064  MachineInstr *BI2 = &*MBB2.getFirstInstrTerminator();
1065  MachineInstr *CMPI2 = MRI->getVRegDef(BI2->getOperand(1).getReg());
1066  bool IsPartiallyRedundant = (MBBtoMoveCmp != nullptr);
1067 
1068  // We cannot optimize an unsupported compare opcode or
1069  // a mix of 32-bit and 64-bit comaprisons
1070  if (!isSupportedCmpOp(CMPI1->getOpcode()) ||
1071  !isSupportedCmpOp(CMPI2->getOpcode()) ||
1072  is64bitCmpOp(CMPI1->getOpcode()) != is64bitCmpOp(CMPI2->getOpcode()))
1073  continue;
1074 
1075  unsigned NewOpCode = 0;
1076  unsigned NewPredicate1 = 0, NewPredicate2 = 0;
1077  int16_t Imm1 = 0, NewImm1 = 0, Imm2 = 0, NewImm2 = 0;
1078  bool SwapOperands = false;
1079 
1080  if (CMPI1->getOpcode() != CMPI2->getOpcode()) {
1081  // Typically, unsigned comparison is used for equality check, but
1082  // we replace it with a signed comparison if the comparison
1083  // to be merged is a signed comparison.
1084  // In other cases of opcode mismatch, we cannot optimize this.
1085 
1086  // We cannot change opcode when comparing against an immediate
1087  // if the most significant bit of the immediate is one
1088  // due to the difference in sign extension.
1089  auto CmpAgainstImmWithSignBit = [](MachineInstr *I) {
1090  if (!I->getOperand(2).isImm())
1091  return false;
1092  int16_t Imm = (int16_t)I->getOperand(2).getImm();
1093  return Imm < 0;
1094  };
1095 
1096  if (isEqOrNe(BI2) && !CmpAgainstImmWithSignBit(CMPI2) &&
1097  CMPI1->getOpcode() == getSignedCmpOpCode(CMPI2->getOpcode()))
1098  NewOpCode = CMPI1->getOpcode();
1099  else if (isEqOrNe(BI1) && !CmpAgainstImmWithSignBit(CMPI1) &&
1100  getSignedCmpOpCode(CMPI1->getOpcode()) == CMPI2->getOpcode())
1101  NewOpCode = CMPI2->getOpcode();
1102  else continue;
1103  }
1104 
1105  if (CMPI1->getOperand(2).isReg() && CMPI2->getOperand(2).isReg()) {
1106  // In case of comparisons between two registers, these two registers
1107  // must be same to merge two comparisons.
1108  unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1109  nullptr, nullptr, MRI);
1110  unsigned Cmp1Operand2 = getSrcVReg(CMPI1->getOperand(2).getReg(),
1111  nullptr, nullptr, MRI);
1112  unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1113  MBB1, &MBB2, MRI);
1114  unsigned Cmp2Operand2 = getSrcVReg(CMPI2->getOperand(2).getReg(),
1115  MBB1, &MBB2, MRI);
1116 
1117  if (Cmp1Operand1 == Cmp2Operand1 && Cmp1Operand2 == Cmp2Operand2) {
1118  // Same pair of registers in the same order; ready to merge as is.
1119  }
1120  else if (Cmp1Operand1 == Cmp2Operand2 && Cmp1Operand2 == Cmp2Operand1) {
1121  // Same pair of registers in different order.
1122  // We reverse the predicate to merge compare instructions.
1123  PPC::Predicate Pred = (PPC::Predicate)BI2->getOperand(0).getImm();
1124  NewPredicate2 = (unsigned)PPC::getSwappedPredicate(Pred);
1125  // In case of partial redundancy, we need to swap operands
1126  // in another compare instruction.
1127  SwapOperands = true;
1128  }
1129  else continue;
1130  }
1131  else if (CMPI1->getOperand(2).isImm() && CMPI2->getOperand(2).isImm()) {
1132  // In case of comparisons between a register and an immediate,
1133  // the operand register must be same for two compare instructions.
1134  unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1135  nullptr, nullptr, MRI);
1136  unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1137  MBB1, &MBB2, MRI);
1138  if (Cmp1Operand1 != Cmp2Operand1)
1139  continue;
1140 
1141  NewImm1 = Imm1 = (int16_t)CMPI1->getOperand(2).getImm();
1142  NewImm2 = Imm2 = (int16_t)CMPI2->getOperand(2).getImm();
1143 
1144  // If immediate are not same, we try to adjust by changing predicate;
1145  // e.g. GT imm means GE (imm+1).
1146  if (Imm1 != Imm2 && (!isEqOrNe(BI2) || !isEqOrNe(BI1))) {
1147  int Diff = Imm1 - Imm2;
1148  if (Diff < -2 || Diff > 2)
1149  continue;
1150 
1151  unsigned PredToInc1 = getPredicateToIncImm(BI1, CMPI1);
1152  unsigned PredToDec1 = getPredicateToDecImm(BI1, CMPI1);
1153  unsigned PredToInc2 = getPredicateToIncImm(BI2, CMPI2);
1154  unsigned PredToDec2 = getPredicateToDecImm(BI2, CMPI2);
1155  if (Diff == 2) {
1156  if (PredToInc2 && PredToDec1) {
1157  NewPredicate2 = PredToInc2;
1158  NewPredicate1 = PredToDec1;
1159  NewImm2++;
1160  NewImm1--;
1161  }
1162  }
1163  else if (Diff == 1) {
1164  if (PredToInc2) {
1165  NewImm2++;
1166  NewPredicate2 = PredToInc2;
1167  }
1168  else if (PredToDec1) {
1169  NewImm1--;
1170  NewPredicate1 = PredToDec1;
1171  }
1172  }
1173  else if (Diff == -1) {
1174  if (PredToDec2) {
1175  NewImm2--;
1176  NewPredicate2 = PredToDec2;
1177  }
1178  else if (PredToInc1) {
1179  NewImm1++;
1180  NewPredicate1 = PredToInc1;
1181  }
1182  }
1183  else if (Diff == -2) {
1184  if (PredToDec2 && PredToInc1) {
1185  NewPredicate2 = PredToDec2;
1186  NewPredicate1 = PredToInc1;
1187  NewImm2--;
1188  NewImm1++;
1189  }
1190  }
1191  }
1192 
1193  // We cannot merge two compares if the immediates are not same.
1194  if (NewImm2 != NewImm1)
1195  continue;
1196  }
1197 
1198  LLVM_DEBUG(dbgs() << "Optimize two pairs of compare and branch:\n");
1199  LLVM_DEBUG(CMPI1->dump());
1200  LLVM_DEBUG(BI1->dump());
1201  LLVM_DEBUG(CMPI2->dump());
1202  LLVM_DEBUG(BI2->dump());
1203 
1204  // We adjust opcode, predicates and immediate as we determined above.
1205  if (NewOpCode != 0 && NewOpCode != CMPI1->getOpcode()) {
1206  CMPI1->setDesc(TII->get(NewOpCode));
1207  }
1208  if (NewPredicate1) {
1209  BI1->getOperand(0).setImm(NewPredicate1);
1210  }
1211  if (NewPredicate2) {
1212  BI2->getOperand(0).setImm(NewPredicate2);
1213  }
1214  if (NewImm1 != Imm1) {
1215  CMPI1->getOperand(2).setImm(NewImm1);
1216  }
1217 
1218  if (IsPartiallyRedundant) {
1219  // We touch up the compare instruction in MBB2 and move it to
1220  // a previous BB to handle partially redundant case.
1221  if (SwapOperands) {
1222  unsigned Op1 = CMPI2->getOperand(1).getReg();
1223  unsigned Op2 = CMPI2->getOperand(2).getReg();
1224  CMPI2->getOperand(1).setReg(Op2);
1225  CMPI2->getOperand(2).setReg(Op1);
1226  }
1227  if (NewImm2 != Imm2)
1228  CMPI2->getOperand(2).setImm(NewImm2);
1229 
1230  for (int I = 1; I <= 2; I++) {
1231  if (CMPI2->getOperand(I).isReg()) {
1232  MachineInstr *Inst = MRI->getVRegDef(CMPI2->getOperand(I).getReg());
1233  if (Inst->getParent() != &MBB2)
1234  continue;
1235 
1236  assert(Inst->getOpcode() == PPC::PHI &&
1237  "We cannot support if an operand comes from this BB.");
1238  unsigned SrcReg = getIncomingRegForBlock(Inst, MBBtoMoveCmp);
1239  CMPI2->getOperand(I).setReg(SrcReg);
1240  }
1241  }
1242  auto I = MachineBasicBlock::iterator(MBBtoMoveCmp->getFirstTerminator());
1243  MBBtoMoveCmp->splice(I, &MBB2, MachineBasicBlock::iterator(CMPI2));
1244 
1245  DebugLoc DL = CMPI2->getDebugLoc();
1246  unsigned NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass);
1247  BuildMI(MBB2, MBB2.begin(), DL,
1248  TII->get(PPC::PHI), NewVReg)
1249  .addReg(BI1->getOperand(1).getReg()).addMBB(MBB1)
1250  .addReg(BI2->getOperand(1).getReg()).addMBB(MBBtoMoveCmp);
1251  BI2->getOperand(1).setReg(NewVReg);
1252  }
1253  else {
1254  // We finally eliminate compare instruction in MBB2.
1255  BI2->getOperand(1).setReg(BI1->getOperand(1).getReg());
1256  CMPI2->eraseFromParent();
1257  }
1258  BI2->getOperand(1).setIsKill(true);
1259  BI1->getOperand(1).setIsKill(false);
1260 
1261  LLVM_DEBUG(dbgs() << "into a compare and two branches:\n");
1262  LLVM_DEBUG(CMPI1->dump());
1263  LLVM_DEBUG(BI1->dump());
1264  LLVM_DEBUG(BI2->dump());
1265  if (IsPartiallyRedundant) {
1266  LLVM_DEBUG(dbgs() << "The following compare is moved into "
1267  << printMBBReference(*MBBtoMoveCmp)
1268  << " to handle partial redundancy.\n");
1269  LLVM_DEBUG(CMPI2->dump());
1270  }
1271 
1272  Simplified = true;
1273  }
1274 
1275  return Simplified;
1276 }
1277 
1278 } // end default namespace
1279 
1280 INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
1281  "PowerPC MI Peephole Optimization", false, false)
1283  "PowerPC MI Peephole Optimization", false, false)
1284 
1285 char PPCMIPeephole::ID = 0;
1286 FunctionPass*
1287 llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
1288 
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
MachineBasicBlock * getMBB() const
This class represents lattice values for constants.
Definition: AllocatorList.h:24
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:383
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned Reg
void initializePPCMIPeepholePass(PassRegistry &)
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:459
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void dump() const
dump - Print the current MachineFunction to cerr, useful for debugger use.
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:189
bool isSignExtended(const MachineInstr &MI, const unsigned depth=0) const
Return true if the output of the instruction is always a sign-extended, i.e.
Definition: PPCInstrInfo.h:403
AnalysisUsage & addRequired()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:412
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE, "PowerPC MI Peephole Optimization", false, false) INITIALIZE_PASS_END(PPCMIPeephole
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool isTOCSaveMI(const MachineInstr &MI) const
bool isFullCopy() const
#define DEBUG_TYPE
static cl::opt< bool > EnableSExtElimination("ppc-eliminate-signext", cl::desc("enable elimination of sign-extensions"), cl::init(false), cl::Hidden)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static cl::opt< bool > ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true), cl::desc("Convert eligible reg+reg instructions to reg+imm"))
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
MachineInstrBundleIterator< MachineInstr > iterator
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static bool is64Bit(const char *name)
Represent the analysis usage information of a pass.
use_instr_iterator use_instr_begin(unsigned RegNo) const
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
std::vector< MachineBasicBlock * >::iterator pred_iterator
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
unsigned getPredicateHint(Predicate Opcode)
Return the hint bits of the predicate.
Definition: PPCPredicates.h:83
bool isDebugInstr() const
Definition: MachineInstr.h:999
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setIsKill(bool Val=true)
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:88
static cl::opt< bool > FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true), cl::desc("Iterate to a fixed point when attempting to " "convert reg-reg instructions to reg-imm"))
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
MachineOperand class - Representation of each machine instruction operand.
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
MachineInstrBuilder MachineInstrBuilder & DefMI
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:27
int64_t getImm() const
unsigned pred_size() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringRef > StandardNames)
Initialize the set of available library functions based on the specified target triple.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:254
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
Definition: MachineInstr.h:64
PowerPC MI Peephole Optimization
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:58
static cl::opt< bool > EnableZExtElimination("ppc-eliminate-zeroext", cl::desc("enable elimination of zero-extensions"), cl::init(false), cl::Hidden)
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool hasOneNonDBGUse(unsigned RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug instruction using the specified regis...
bool isReg() const
isReg - Tests if this is a MO_Register operand.
FunctionPass * createPPCMIPeepholePass()
unsigned getPredicateCondition(Predicate Opcode)
Return the condition without hint bits.
Definition: PPCPredicates.h:78
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
IRTranslator LLVM IR MI
void setRegClass(unsigned Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
void RemoveOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
#define LLVM_DEBUG(X)
Definition: Debug.h:123
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
const PPCRegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
Definition: PPCInstrInfo.h:190
virtual unsigned lookThruCopyLike(unsigned SrcReg, const MachineRegisterInfo *MRI) const
Returns the original SrcReg unless it is the target of a copy-like operation, in which case we chain ...
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool convertToImmediateForm(MachineInstr &MI, MachineInstr **KilledDef=nullptr) const
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
XXPERMDI - The PPC XXPERMDI instruction.
bool isZeroExtended(const MachineInstr &MI, const unsigned depth=0) const
Return true if the output of the instruction is always zero-extended, i.e.
Definition: PPCInstrInfo.h:409
Predicate getSwappedPredicate(Predicate Opcode)
Assume the condition register is set by MI(a,b), return the predicate if we modify the instructions s...