LLVM  8.0.1
PPCISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a pattern matching instruction selector for PowerPC,
11 // converting from a legalized dag to a PPC dag.
12 //
13 //===----------------------------------------------------------------------===//
14 
17 #include "PPC.h"
18 #include "PPCISelLowering.h"
19 #include "PPCMachineFunctionInfo.h"
20 #include "PPCSubtarget.h"
21 #include "PPCTargetMachine.h"
22 #include "llvm/ADT/APInt.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/Statistic.h"
41 #include "llvm/IR/BasicBlock.h"
42 #include "llvm/IR/DebugLoc.h"
43 #include "llvm/IR/Function.h"
44 #include "llvm/IR/GlobalValue.h"
45 #include "llvm/IR/InlineAsm.h"
46 #include "llvm/IR/InstrTypes.h"
47 #include "llvm/IR/Module.h"
48 #include "llvm/Support/Casting.h"
49 #include "llvm/Support/CodeGen.h"
51 #include "llvm/Support/Compiler.h"
52 #include "llvm/Support/Debug.h"
54 #include "llvm/Support/KnownBits.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <cstdint>
61 #include <iterator>
62 #include <limits>
63 #include <memory>
64 #include <new>
65 #include <tuple>
66 #include <utility>
67 
68 using namespace llvm;
69 
70 #define DEBUG_TYPE "ppc-codegen"
71 
72 STATISTIC(NumSextSetcc,
73  "Number of (sext(setcc)) nodes expanded into GPR sequence.");
74 STATISTIC(NumZextSetcc,
75  "Number of (zext(setcc)) nodes expanded into GPR sequence.");
76 STATISTIC(SignExtensionsAdded,
77  "Number of sign extensions for compare inputs added.");
78 STATISTIC(ZeroExtensionsAdded,
79  "Number of zero extensions for compare inputs added.");
80 STATISTIC(NumLogicOpsOnComparison,
81  "Number of logical ops on i1 values calculated in GPR.");
82 STATISTIC(OmittedForNonExtendUses,
83  "Number of compares not eliminated as they have non-extending uses.");
84 STATISTIC(NumP9Setb,
85  "Number of compares lowered to setb.");
86 
87 // FIXME: Remove this once the bug has been fixed!
88 cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
89 cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
90 
91 static cl::opt<bool>
92  UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
93  cl::desc("use aggressive ppc isel for bit permutations"),
94  cl::Hidden);
96  "ppc-bit-perm-rewriter-stress-rotates",
97  cl::desc("stress rotate selection in aggressive ppc isel for "
98  "bit permutations"),
99  cl::Hidden);
100 
102  "ppc-use-branch-hint", cl::init(true),
103  cl::desc("Enable static hinting of branches on ppc"),
104  cl::Hidden);
105 
107  "ppc-tls-opt", cl::init(true),
108  cl::desc("Enable tls optimization peephole"),
109  cl::Hidden);
110 
114 
116  "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All),
117  cl::desc("Specify the types of comparisons to emit GPR-only code for."),
118  cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."),
119  clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."),
120  clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."),
121  clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."),
122  clEnumValN(ICGPR_NonExtIn, "nonextin",
123  "Only comparisons where inputs don't need [sz]ext."),
124  clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."),
125  clEnumValN(ICGPR_ZextI32, "zexti32",
126  "Only i32 comparisons with zext result."),
127  clEnumValN(ICGPR_ZextI64, "zexti64",
128  "Only i64 comparisons with zext result."),
129  clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."),
130  clEnumValN(ICGPR_SextI32, "sexti32",
131  "Only i32 comparisons with sext result."),
132  clEnumValN(ICGPR_SextI64, "sexti64",
133  "Only i64 comparisons with sext result.")));
134 namespace {
135 
136  //===--------------------------------------------------------------------===//
137  /// PPCDAGToDAGISel - PPC specific code to select PPC machine
138  /// instructions for SelectionDAG operations.
139  ///
140  class PPCDAGToDAGISel : public SelectionDAGISel {
141  const PPCTargetMachine &TM;
142  const PPCSubtarget *PPCSubTarget;
143  const PPCTargetLowering *PPCLowering;
144  unsigned GlobalBaseReg;
145 
146  public:
147  explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel)
148  : SelectionDAGISel(tm, OptLevel), TM(tm) {}
149 
150  bool runOnMachineFunction(MachineFunction &MF) override {
151  // Make sure we re-emit a set of the global base reg if necessary
152  GlobalBaseReg = 0;
153  PPCSubTarget = &MF.getSubtarget<PPCSubtarget>();
154  PPCLowering = PPCSubTarget->getTargetLowering();
156 
157  if (!PPCSubTarget->isSVR4ABI())
158  InsertVRSaveCode(MF);
159 
160  return true;
161  }
162 
163  void PreprocessISelDAG() override;
164  void PostprocessISelDAG() override;
165 
166  /// getI16Imm - Return a target constant with the specified value, of type
167  /// i16.
168  inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) {
169  return CurDAG->getTargetConstant(Imm, dl, MVT::i16);
170  }
171 
172  /// getI32Imm - Return a target constant with the specified value, of type
173  /// i32.
174  inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
175  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
176  }
177 
178  /// getI64Imm - Return a target constant with the specified value, of type
179  /// i64.
180  inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) {
181  return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
182  }
183 
184  /// getSmallIPtrImm - Return a target constant of pointer type.
185  inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) {
186  return CurDAG->getTargetConstant(
187  Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
188  }
189 
190  /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
191  /// rotate and mask opcode and mask operation.
192  static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
193  unsigned &SH, unsigned &MB, unsigned &ME);
194 
195  /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
196  /// base register. Return the virtual register that holds this value.
197  SDNode *getGlobalBaseReg();
198 
199  void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0);
200 
201  // Select - Convert the specified operand from a target-independent to a
202  // target-specific node if it hasn't already been changed.
203  void Select(SDNode *N) override;
204 
205  bool tryBitfieldInsert(SDNode *N);
206  bool tryBitPermutation(SDNode *N);
207  bool tryIntCompareInGPR(SDNode *N);
208 
209  // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
210  // an X-Form load instruction with the offset being a relocation coming from
211  // the PPCISD::ADD_TLS.
212  bool tryTLSXFormLoad(LoadSDNode *N);
213  // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
214  // an X-Form store instruction with the offset being a relocation coming from
215  // the PPCISD::ADD_TLS.
216  bool tryTLSXFormStore(StoreSDNode *N);
217  /// SelectCC - Select a comparison of the specified values with the
218  /// specified condition code, returning the CR# of the expression.
219  SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
220  const SDLoc &dl);
221 
222  /// SelectAddrImm - Returns true if the address N can be represented by
223  /// a base register plus a signed 16-bit displacement [r+imm].
224  bool SelectAddrImm(SDValue N, SDValue &Disp,
225  SDValue &Base) {
226  return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0);
227  }
228 
229  /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
230  /// immediate field. Note that the operand at this point is already the
231  /// result of a prior SelectAddressRegImm call.
232  bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
233  if (N.getOpcode() == ISD::TargetConstant ||
235  Out = N;
236  return true;
237  }
238 
239  return false;
240  }
241 
242  /// SelectAddrIdx - Given the specified addressed, check to see if it can be
243  /// represented as an indexed [r+r] operation. Returns false if it can
244  /// be represented by [r+imm], which are preferred.
245  bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
246  return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG);
247  }
248 
249  /// SelectAddrIdxOnly - Given the specified addressed, force it to be
250  /// represented as an indexed [r+r] operation.
251  bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
252  return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
253  }
254 
255  /// SelectAddrImmX4 - Returns true if the address N can be represented by
256  /// a base register plus a signed 16-bit displacement that is a multiple of 4.
257  /// Suitable for use by STD and friends.
258  bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
259  return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4);
260  }
261 
262  bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
263  return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16);
264  }
265 
266  // Select an address into a single register.
267  bool SelectAddr(SDValue N, SDValue &Base) {
268  Base = N;
269  return true;
270  }
271 
272  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
273  /// inline asm expressions. It is always correct to compute the value into
274  /// a register. The case of adding a (possibly relocatable) constant to a
275  /// register can be improved, but it is wrong to substitute Reg+Reg for
276  /// Reg in an asm, because the load or store opcode would have to change.
277  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
278  unsigned ConstraintID,
279  std::vector<SDValue> &OutOps) override {
280  switch(ConstraintID) {
281  default:
282  errs() << "ConstraintID: " << ConstraintID << "\n";
283  llvm_unreachable("Unexpected asm memory constraint");
291  // We need to make sure that this one operand does not end up in r0
292  // (because we might end up lowering this as 0(%op)).
293  const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo();
294  const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
295  SDLoc dl(Op);
296  SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
297  SDValue NewOp =
298  SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
299  dl, Op.getValueType(),
300  Op, RC), 0);
301 
302  OutOps.push_back(NewOp);
303  return false;
304  }
305  return true;
306  }
307 
308  void InsertVRSaveCode(MachineFunction &MF);
309 
310  StringRef getPassName() const override {
311  return "PowerPC DAG->DAG Pattern Instruction Selection";
312  }
313 
314 // Include the pieces autogenerated from the target description.
315 #include "PPCGenDAGISel.inc"
316 
317 private:
318  bool trySETCC(SDNode *N);
319 
320  void PeepholePPC64();
321  void PeepholePPC64ZExt();
322  void PeepholeCROps();
323 
324  SDValue combineToCMPB(SDNode *N);
325  void foldBoolExts(SDValue &Res, SDNode *&N);
326 
327  bool AllUsersSelectZero(SDNode *N);
328  void SwapAllSelectUsers(SDNode *N);
329 
330  bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
331  void transferMemOperands(SDNode *N, SDNode *Result);
332  };
333 
334 } // end anonymous namespace
335 
336 /// InsertVRSaveCode - Once the entire function has been instruction selected,
337 /// all virtual registers are created and all machine instructions are built,
338 /// check to see if we need to save/restore VRSAVE. If so, do it.
339 void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
340  // Check to see if this function uses vector registers, which means we have to
341  // save and restore the VRSAVE register and update it with the regs we use.
342  //
343  // In this case, there will be virtual registers of vector type created
344  // by the scheduler. Detect them now.
345  bool HasVectorVReg = false;
346  for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) {
348  if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) {
349  HasVectorVReg = true;
350  break;
351  }
352  }
353  if (!HasVectorVReg) return; // nothing to do.
354 
355  // If we have a vector register, we want to emit code into the entry and exit
356  // blocks to save and restore the VRSAVE register. We do this here (instead
357  // of marking all vector instructions as clobbering VRSAVE) for two reasons:
358  //
359  // 1. This (trivially) reduces the load on the register allocator, by not
360  // having to represent the live range of the VRSAVE register.
361  // 2. This (more significantly) allows us to create a temporary virtual
362  // register to hold the saved VRSAVE value, allowing this temporary to be
363  // register allocated, instead of forcing it to be spilled to the stack.
364 
365  // Create two vregs - one to hold the VRSAVE register that is live-in to the
366  // function and one for the value after having bits or'd into it.
367  unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
368  unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
369 
370  const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
371  MachineBasicBlock &EntryBB = *Fn.begin();
372  DebugLoc dl;
373  // Emit the following code into the entry block:
374  // InVRSAVE = MFVRSAVE
375  // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE
376  // MTVRSAVE UpdatedVRSAVE
377  MachineBasicBlock::iterator IP = EntryBB.begin(); // Insert Point
378  BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE);
379  BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE),
380  UpdatedVRSAVE).addReg(InVRSAVE);
381  BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE);
382 
383  // Find all return blocks, outputting a restore in each epilog.
384  for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
385  if (BB->isReturnBlock()) {
386  IP = BB->end(); --IP;
387 
388  // Skip over all terminator instructions, which are part of the return
389  // sequence.
391  while (I2 != BB->begin() && (--I2)->isTerminator())
392  IP = I2;
393 
394  // Emit: MTVRSAVE InVRSave
395  BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE);
396  }
397  }
398 }
399 
400 /// getGlobalBaseReg - Output the instructions required to put the
401 /// base address to use for accessing globals into a register.
402 ///
403 SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
404  if (!GlobalBaseReg) {
405  const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
406  // Insert the set of GlobalBaseReg into the first MBB of the function
407  MachineBasicBlock &FirstMBB = MF->front();
408  MachineBasicBlock::iterator MBBI = FirstMBB.begin();
409  const Module *M = MF->getFunction().getParent();
410  DebugLoc dl;
411 
412  if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
413  if (PPCSubTarget->isTargetELF()) {
414  GlobalBaseReg = PPC::R30;
415  if (M->getPICLevel() == PICLevel::SmallPIC) {
416  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
417  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
418  MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
419  } else {
420  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
421  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
422  unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
423  BuildMI(FirstMBB, MBBI, dl,
424  TII.get(PPC::UpdateGBR), GlobalBaseReg)
425  .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
426  MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
427  }
428  } else {
429  GlobalBaseReg =
430  RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);
431  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
432  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
433  }
434  } else {
435  // We must ensure that this sequence is dominated by the prologue.
436  // FIXME: This is a bit of a big hammer since we don't get the benefits
437  // of shrink-wrapping whenever we emit this instruction. Considering
438  // this is used in any function where we emit a jump table, this may be
439  // a significant limitation. We should consider inserting this in the
440  // block where it is used and then commoning this sequence up if it
441  // appears in multiple places.
442  // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of
443  // MovePCtoLR8.
444  MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true);
445  GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
446  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
447  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
448  }
449  }
450  return CurDAG->getRegister(GlobalBaseReg,
451  PPCLowering->getPointerTy(CurDAG->getDataLayout()))
452  .getNode();
453 }
454 
455 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
456 /// operand. If so Imm will receive the 32-bit value.
457 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
458  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
459  Imm = cast<ConstantSDNode>(N)->getZExtValue();
460  return true;
461  }
462  return false;
463 }
464 
465 /// isInt64Immediate - This method tests to see if the node is a 64-bit constant
466 /// operand. If so Imm will receive the 64-bit value.
467 static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
468  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
469  Imm = cast<ConstantSDNode>(N)->getZExtValue();
470  return true;
471  }
472  return false;
473 }
474 
475 // isInt32Immediate - This method tests to see if a constant operand.
476 // If so Imm will receive the 32 bit value.
477 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
478  return isInt32Immediate(N.getNode(), Imm);
479 }
480 
481 /// isInt64Immediate - This method tests to see if the value is a 64-bit
482 /// constant operand. If so Imm will receive the 64-bit value.
483 static bool isInt64Immediate(SDValue N, uint64_t &Imm) {
484  return isInt64Immediate(N.getNode(), Imm);
485 }
486 
487 static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,
488  const SDValue &DestMBB) {
489  assert(isa<BasicBlockSDNode>(DestMBB));
490 
491  if (!FuncInfo->BPI) return PPC::BR_NO_HINT;
492 
493  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
494  const Instruction *BBTerm = BB->getTerminator();
495 
496  if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
497 
498  const BasicBlock *TBB = BBTerm->getSuccessor(0);
499  const BasicBlock *FBB = BBTerm->getSuccessor(1);
500 
501  auto TProb = FuncInfo->BPI->getEdgeProbability(BB, TBB);
502  auto FProb = FuncInfo->BPI->getEdgeProbability(BB, FBB);
503 
504  // We only want to handle cases which are easy to predict at static time, e.g.
505  // C++ throw statement, that is very likely not taken, or calling never
506  // returned function, e.g. stdlib exit(). So we set Threshold to filter
507  // unwanted cases.
508  //
509  // Below is LLVM branch weight table, we only want to handle case 1, 2
510  //
511  // Case Taken:Nontaken Example
512  // 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
513  // 2. Invoke-terminating 1:1048575
514  // 3. Coldblock 4:64 __builtin_expect
515  // 4. Loop Branch 124:4 For loop
516  // 5. PH/ZH/FPH 20:12
517  const uint32_t Threshold = 10000;
518 
519  if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
520  return PPC::BR_NO_HINT;
521 
522  LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo->Fn->getName()
523  << "::" << BB->getName() << "'\n"
524  << " -> " << TBB->getName() << ": " << TProb << "\n"
525  << " -> " << FBB->getName() << ": " << FProb << "\n");
526 
527  const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
528 
529  // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
530  // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
531  if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
532  std::swap(TProb, FProb);
533 
534  return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
535 }
536 
537 // isOpcWithIntImmediate - This method tests to see if the node is a specific
538 // opcode and that it has a immediate integer right operand.
539 // If so Imm will receive the 32 bit value.
540 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
541  return N->getOpcode() == Opc
542  && isInt32Immediate(N->getOperand(1).getNode(), Imm);
543 }
544 
545 void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) {
546  SDLoc dl(SN);
547  int FI = cast<FrameIndexSDNode>(N)->getIndex();
548  SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
549  unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
550  if (SN->hasOneUse())
551  CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
552  getSmallIPtrImm(Offset, dl));
553  else
554  ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
555  getSmallIPtrImm(Offset, dl)));
556 }
557 
558 bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
559  bool isShiftMask, unsigned &SH,
560  unsigned &MB, unsigned &ME) {
561  // Don't even go down this path for i64, since different logic will be
562  // necessary for rldicl/rldicr/rldimi.
563  if (N->getValueType(0) != MVT::i32)
564  return false;
565 
566  unsigned Shift = 32;
567  unsigned Indeterminant = ~0; // bit mask marking indeterminant results
568  unsigned Opcode = N->getOpcode();
569  if (N->getNumOperands() != 2 ||
570  !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
571  return false;
572 
573  if (Opcode == ISD::SHL) {
574  // apply shift left to mask if it comes first
575  if (isShiftMask) Mask = Mask << Shift;
576  // determine which bits are made indeterminant by shift
577  Indeterminant = ~(0xFFFFFFFFu << Shift);
578  } else if (Opcode == ISD::SRL) {
579  // apply shift right to mask if it comes first
580  if (isShiftMask) Mask = Mask >> Shift;
581  // determine which bits are made indeterminant by shift
582  Indeterminant = ~(0xFFFFFFFFu >> Shift);
583  // adjust for the left rotate
584  Shift = 32 - Shift;
585  } else if (Opcode == ISD::ROTL) {
586  Indeterminant = 0;
587  } else {
588  return false;
589  }
590 
591  // if the mask doesn't intersect any Indeterminant bits
592  if (Mask && !(Mask & Indeterminant)) {
593  SH = Shift & 31;
594  // make sure the mask is still a mask (wrap arounds may not be)
595  return isRunOfOnes(Mask, MB, ME);
596  }
597  return false;
598 }
599 
600 bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
601  SDValue Base = ST->getBasePtr();
602  if (Base.getOpcode() != PPCISD::ADD_TLS)
603  return false;
604  SDValue Offset = ST->getOffset();
605  if (!Offset.isUndef())
606  return false;
607 
608  SDLoc dl(ST);
609  EVT MemVT = ST->getMemoryVT();
610  EVT RegVT = ST->getValue().getValueType();
611 
612  unsigned Opcode;
613  switch (MemVT.getSimpleVT().SimpleTy) {
614  default:
615  return false;
616  case MVT::i8: {
617  Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
618  break;
619  }
620  case MVT::i16: {
621  Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
622  break;
623  }
624  case MVT::i32: {
625  Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
626  break;
627  }
628  case MVT::i64: {
629  Opcode = PPC::STDXTLS;
630  break;
631  }
632  }
633  SDValue Chain = ST->getChain();
634  SDVTList VTs = ST->getVTList();
635  SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1),
636  Chain};
637  SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
638  transferMemOperands(ST, MN);
639  ReplaceNode(ST, MN);
640  return true;
641 }
642 
643 bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
644  SDValue Base = LD->getBasePtr();
645  if (Base.getOpcode() != PPCISD::ADD_TLS)
646  return false;
647  SDValue Offset = LD->getOffset();
648  if (!Offset.isUndef())
649  return false;
650 
651  SDLoc dl(LD);
652  EVT MemVT = LD->getMemoryVT();
653  EVT RegVT = LD->getValueType(0);
654  unsigned Opcode;
655  switch (MemVT.getSimpleVT().SimpleTy) {
656  default:
657  return false;
658  case MVT::i8: {
659  Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
660  break;
661  }
662  case MVT::i16: {
663  Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS;
664  break;
665  }
666  case MVT::i32: {
667  Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS;
668  break;
669  }
670  case MVT::i64: {
671  Opcode = PPC::LDXTLS;
672  break;
673  }
674  }
675  SDValue Chain = LD->getChain();
676  SDVTList VTs = LD->getVTList();
677  SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain};
678  SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
679  transferMemOperands(LD, MN);
680  ReplaceNode(LD, MN);
681  return true;
682 }
683 
684 /// Turn an or of two masked values into the rotate left word immediate then
685 /// mask insert (rlwimi) instruction.
686 bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
687  SDValue Op0 = N->getOperand(0);
688  SDValue Op1 = N->getOperand(1);
689  SDLoc dl(N);
690 
691  KnownBits LKnown = CurDAG->computeKnownBits(Op0);
692  KnownBits RKnown = CurDAG->computeKnownBits(Op1);
693 
694  unsigned TargetMask = LKnown.Zero.getZExtValue();
695  unsigned InsertMask = RKnown.Zero.getZExtValue();
696 
697  if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
698  unsigned Op0Opc = Op0.getOpcode();
699  unsigned Op1Opc = Op1.getOpcode();
700  unsigned Value, SH = 0;
701  TargetMask = ~TargetMask;
702  InsertMask = ~InsertMask;
703 
704  // If the LHS has a foldable shift and the RHS does not, then swap it to the
705  // RHS so that we can fold the shift into the insert.
706  if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
707  if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
708  Op0.getOperand(0).getOpcode() == ISD::SRL) {
709  if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
710  Op1.getOperand(0).getOpcode() != ISD::SRL) {
711  std::swap(Op0, Op1);
712  std::swap(Op0Opc, Op1Opc);
713  std::swap(TargetMask, InsertMask);
714  }
715  }
716  } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
717  if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
718  Op1.getOperand(0).getOpcode() != ISD::SRL) {
719  std::swap(Op0, Op1);
720  std::swap(Op0Opc, Op1Opc);
721  std::swap(TargetMask, InsertMask);
722  }
723  }
724 
725  unsigned MB, ME;
726  if (isRunOfOnes(InsertMask, MB, ME)) {
727  if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
728  isInt32Immediate(Op1.getOperand(1), Value)) {
729  Op1 = Op1.getOperand(0);
730  SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
731  }
732  if (Op1Opc == ISD::AND) {
733  // The AND mask might not be a constant, and we need to make sure that
734  // if we're going to fold the masking with the insert, all bits not
735  // know to be zero in the mask are known to be one.
736  KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1));
737  bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
738 
739  unsigned SHOpc = Op1.getOperand(0).getOpcode();
740  if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
741  isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
742  // Note that Value must be in range here (less than 32) because
743  // otherwise there would not be any bits set in InsertMask.
744  Op1 = Op1.getOperand(0).getOperand(0);
745  SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
746  }
747  }
748 
749  SH &= 31;
750  SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
751  getI32Imm(ME, dl) };
752  ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
753  return true;
754  }
755  }
756  return false;
757 }
758 
759 // Predict the number of instructions that would be generated by calling
760 // selectI64Imm(N).
761 static unsigned selectI64ImmInstrCountDirect(int64_t Imm) {
762  // Assume no remaining bits.
763  unsigned Remainder = 0;
764  // Assume no shift required.
765  unsigned Shift = 0;
766 
767  // If it can't be represented as a 32 bit value.
768  if (!isInt<32>(Imm)) {
769  Shift = countTrailingZeros<uint64_t>(Imm);
770  int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
771 
772  // If the shifted value fits 32 bits.
773  if (isInt<32>(ImmSh)) {
774  // Go with the shifted value.
775  Imm = ImmSh;
776  } else {
777  // Still stuck with a 64 bit value.
778  Remainder = Imm;
779  Shift = 32;
780  Imm >>= 32;
781  }
782  }
783 
784  // Intermediate operand.
785  unsigned Result = 0;
786 
787  // Handle first 32 bits.
788  unsigned Lo = Imm & 0xFFFF;
789 
790  // Simple value.
791  if (isInt<16>(Imm)) {
792  // Just the Lo bits.
793  ++Result;
794  } else if (Lo) {
795  // Handle the Hi bits and Lo bits.
796  Result += 2;
797  } else {
798  // Just the Hi bits.
799  ++Result;
800  }
801 
802  // If no shift, we're done.
803  if (!Shift) return Result;
804 
805  // If Hi word == Lo word,
806  // we can use rldimi to insert the Lo word into Hi word.
807  if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) {
808  ++Result;
809  return Result;
810  }
811 
812  // Shift for next step if the upper 32-bits were not zero.
813  if (Imm)
814  ++Result;
815 
816  // Add in the last bits as required.
817  if ((Remainder >> 16) & 0xFFFF)
818  ++Result;
819  if (Remainder & 0xFFFF)
820  ++Result;
821 
822  return Result;
823 }
824 
825 static uint64_t Rot64(uint64_t Imm, unsigned R) {
826  return (Imm << R) | (Imm >> (64 - R));
827 }
828 
829 static unsigned selectI64ImmInstrCount(int64_t Imm) {
830  unsigned Count = selectI64ImmInstrCountDirect(Imm);
831 
832  // If the instruction count is 1 or 2, we do not need further analysis
833  // since rotate + load constant requires at least 2 instructions.
834  if (Count <= 2)
835  return Count;
836 
837  for (unsigned r = 1; r < 63; ++r) {
838  uint64_t RImm = Rot64(Imm, r);
839  unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1;
840  Count = std::min(Count, RCount);
841 
842  // See comments in selectI64Imm for an explanation of the logic below.
843  unsigned LS = findLastSet(RImm);
844  if (LS != r-1)
845  continue;
846 
847  uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
848  uint64_t RImmWithOnes = RImm | OnesMask;
849 
850  RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1;
851  Count = std::min(Count, RCount);
852  }
853 
854  return Count;
855 }
856 
857 // Select a 64-bit constant. For cost-modeling purposes, selectI64ImmInstrCount
858 // (above) needs to be kept in sync with this function.
859 static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
860  int64_t Imm) {
861  // Assume no remaining bits.
862  unsigned Remainder = 0;
863  // Assume no shift required.
864  unsigned Shift = 0;
865 
866  // If it can't be represented as a 32 bit value.
867  if (!isInt<32>(Imm)) {
868  Shift = countTrailingZeros<uint64_t>(Imm);
869  int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
870 
871  // If the shifted value fits 32 bits.
872  if (isInt<32>(ImmSh)) {
873  // Go with the shifted value.
874  Imm = ImmSh;
875  } else {
876  // Still stuck with a 64 bit value.
877  Remainder = Imm;
878  Shift = 32;
879  Imm >>= 32;
880  }
881  }
882 
883  // Intermediate operand.
884  SDNode *Result;
885 
886  // Handle first 32 bits.
887  unsigned Lo = Imm & 0xFFFF;
888  unsigned Hi = (Imm >> 16) & 0xFFFF;
889 
890  auto getI32Imm = [CurDAG, dl](unsigned Imm) {
891  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
892  };
893 
894  // Simple value.
895  if (isInt<16>(Imm)) {
896  uint64_t SextImm = SignExtend64(Lo, 16);
897  SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
898  // Just the Lo bits.
899  Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
900  } else if (Lo) {
901  // Handle the Hi bits.
902  unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8;
903  Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi));
904  // And Lo bits.
905  Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
906  SDValue(Result, 0), getI32Imm(Lo));
907  } else {
908  // Just the Hi bits.
909  Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi));
910  }
911 
912  // If no shift, we're done.
913  if (!Shift) return Result;
914 
915  // If Hi word == Lo word,
916  // we can use rldimi to insert the Lo word into Hi word.
917  if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) {
918  SDValue Ops[] =
919  { SDValue(Result, 0), SDValue(Result, 0), getI32Imm(Shift), getI32Imm(0)};
920  return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
921  }
922 
923  // Shift for next step if the upper 32-bits were not zero.
924  if (Imm) {
925  Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64,
926  SDValue(Result, 0),
927  getI32Imm(Shift),
928  getI32Imm(63 - Shift));
929  }
930 
931  // Add in the last bits as required.
932  if ((Hi = (Remainder >> 16) & 0xFFFF)) {
933  Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
934  SDValue(Result, 0), getI32Imm(Hi));
935  }
936  if ((Lo = Remainder & 0xFFFF)) {
937  Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
938  SDValue(Result, 0), getI32Imm(Lo));
939  }
940 
941  return Result;
942 }
943 
944 static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl,
945  int64_t Imm) {
946  unsigned Count = selectI64ImmInstrCountDirect(Imm);
947 
948  // If the instruction count is 1 or 2, we do not need further analysis
949  // since rotate + load constant requires at least 2 instructions.
950  if (Count <= 2)
951  return selectI64ImmDirect(CurDAG, dl, Imm);
952 
953  unsigned RMin = 0;
954 
955  int64_t MatImm;
956  unsigned MaskEnd;
957 
958  for (unsigned r = 1; r < 63; ++r) {
959  uint64_t RImm = Rot64(Imm, r);
960  unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1;
961  if (RCount < Count) {
962  Count = RCount;
963  RMin = r;
964  MatImm = RImm;
965  MaskEnd = 63;
966  }
967 
968  // If the immediate to generate has many trailing zeros, it might be
969  // worthwhile to generate a rotated value with too many leading ones
970  // (because that's free with li/lis's sign-extension semantics), and then
971  // mask them off after rotation.
972 
973  unsigned LS = findLastSet(RImm);
974  // We're adding (63-LS) higher-order ones, and we expect to mask them off
975  // after performing the inverse rotation by (64-r). So we need that:
976  // 63-LS == 64-r => LS == r-1
977  if (LS != r-1)
978  continue;
979 
980  uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
981  uint64_t RImmWithOnes = RImm | OnesMask;
982 
983  RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1;
984  if (RCount < Count) {
985  Count = RCount;
986  RMin = r;
987  MatImm = RImmWithOnes;
988  MaskEnd = LS;
989  }
990  }
991 
992  if (!RMin)
993  return selectI64ImmDirect(CurDAG, dl, Imm);
994 
995  auto getI32Imm = [CurDAG, dl](unsigned Imm) {
996  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
997  };
998 
999  SDValue Val = SDValue(selectI64ImmDirect(CurDAG, dl, MatImm), 0);
1000  return CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Val,
1001  getI32Imm(64 - RMin), getI32Imm(MaskEnd));
1002 }
1003 
1004 static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
1005  unsigned MaxTruncation = 0;
1006  // Cannot use range-based for loop here as we need the actual use (i.e. we
1007  // need the operand number corresponding to the use). A range-based for
1008  // will unbox the use and provide an SDNode*.
1009  for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end();
1010  Use != UseEnd; ++Use) {
1011  unsigned Opc =
1012  Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode();
1013  switch (Opc) {
1014  default: return 0;
1015  case ISD::TRUNCATE:
1016  if (Use->isMachineOpcode())
1017  return 0;
1018  MaxTruncation =
1019  std::max(MaxTruncation, Use->getValueType(0).getSizeInBits());
1020  continue;
1021  case ISD::STORE: {
1022  if (Use->isMachineOpcode())
1023  return 0;
1024  StoreSDNode *STN = cast<StoreSDNode>(*Use);
1025  unsigned MemVTSize = STN->getMemoryVT().getSizeInBits();
1026  if (MemVTSize == 64 || Use.getOperandNo() != 0)
1027  return 0;
1028  MaxTruncation = std::max(MaxTruncation, MemVTSize);
1029  continue;
1030  }
1031  case PPC::STW8:
1032  case PPC::STWX8:
1033  case PPC::STWU8:
1034  case PPC::STWUX8:
1035  if (Use.getOperandNo() != 0)
1036  return 0;
1037  MaxTruncation = std::max(MaxTruncation, 32u);
1038  continue;
1039  case PPC::STH8:
1040  case PPC::STHX8:
1041  case PPC::STHU8:
1042  case PPC::STHUX8:
1043  if (Use.getOperandNo() != 0)
1044  return 0;
1045  MaxTruncation = std::max(MaxTruncation, 16u);
1046  continue;
1047  case PPC::STB8:
1048  case PPC::STBX8:
1049  case PPC::STBU8:
1050  case PPC::STBUX8:
1051  if (Use.getOperandNo() != 0)
1052  return 0;
1053  MaxTruncation = std::max(MaxTruncation, 8u);
1054  continue;
1055  }
1056  }
1057  return MaxTruncation;
1058 }
1059 
1060 // Select a 64-bit constant.
1061 static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) {
1062  SDLoc dl(N);
1063 
1064  // Get 64 bit value.
1065  int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
1066  if (unsigned MinSize = allUsesTruncate(CurDAG, N)) {
1067  uint64_t SextImm = SignExtend64(Imm, MinSize);
1068  SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
1069  if (isInt<16>(SextImm))
1070  return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
1071  }
1072  return selectI64Imm(CurDAG, dl, Imm);
1073 }
1074 
1075 namespace {
1076 
1077 class BitPermutationSelector {
1078  struct ValueBit {
1079  SDValue V;
1080 
1081  // The bit number in the value, using a convention where bit 0 is the
1082  // lowest-order bit.
1083  unsigned Idx;
1084 
1085  // ConstZero means a bit we need to mask off.
1086  // Variable is a bit comes from an input variable.
1087  // VariableKnownToBeZero is also a bit comes from an input variable,
1088  // but it is known to be already zero. So we do not need to mask them.
1089  enum Kind {
1090  ConstZero,
1091  Variable,
1092  VariableKnownToBeZero
1093  } K;
1094 
1095  ValueBit(SDValue V, unsigned I, Kind K = Variable)
1096  : V(V), Idx(I), K(K) {}
1097  ValueBit(Kind K = Variable)
1098  : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {}
1099 
1100  bool isZero() const {
1101  return K == ConstZero || K == VariableKnownToBeZero;
1102  }
1103 
1104  bool hasValue() const {
1105  return K == Variable || K == VariableKnownToBeZero;
1106  }
1107 
1108  SDValue getValue() const {
1109  assert(hasValue() && "Cannot get the value of a constant bit");
1110  return V;
1111  }
1112 
1113  unsigned getValueBitIndex() const {
1114  assert(hasValue() && "Cannot get the value bit index of a constant bit");
1115  return Idx;
1116  }
1117  };
1118 
1119  // A bit group has the same underlying value and the same rotate factor.
1120  struct BitGroup {
1121  SDValue V;
1122  unsigned RLAmt;
1123  unsigned StartIdx, EndIdx;
1124 
1125  // This rotation amount assumes that the lower 32 bits of the quantity are
1126  // replicated in the high 32 bits by the rotation operator (which is done
1127  // by rlwinm and friends in 64-bit mode).
1128  bool Repl32;
1129  // Did converting to Repl32 == true change the rotation factor? If it did,
1130  // it decreased it by 32.
1131  bool Repl32CR;
1132  // Was this group coalesced after setting Repl32 to true?
1133  bool Repl32Coalesced;
1134 
1135  BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
1136  : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
1137  Repl32Coalesced(false) {
1138  LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R
1139  << " [" << S << ", " << E << "]\n");
1140  }
1141  };
1142 
1143  // Information on each (Value, RLAmt) pair (like the number of groups
1144  // associated with each) used to choose the lowering method.
1145  struct ValueRotInfo {
1146  SDValue V;
1147  unsigned RLAmt = std::numeric_limits<unsigned>::max();
1148  unsigned NumGroups = 0;
1149  unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();
1150  bool Repl32 = false;
1151 
1152  ValueRotInfo() = default;
1153 
1154  // For sorting (in reverse order) by NumGroups, and then by
1155  // FirstGroupStartIdx.
1156  bool operator < (const ValueRotInfo &Other) const {
1157  // We need to sort so that the non-Repl32 come first because, when we're
1158  // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
1159  // masking operation.
1160  if (Repl32 < Other.Repl32)
1161  return true;
1162  else if (Repl32 > Other.Repl32)
1163  return false;
1164  else if (NumGroups > Other.NumGroups)
1165  return true;
1166  else if (NumGroups < Other.NumGroups)
1167  return false;
1168  else if (RLAmt == 0 && Other.RLAmt != 0)
1169  return true;
1170  else if (RLAmt != 0 && Other.RLAmt == 0)
1171  return false;
1172  else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
1173  return true;
1174  return false;
1175  }
1176  };
1177 
1178  using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;
1179  using ValueBitsMemoizer =
1181  ValueBitsMemoizer Memoizer;
1182 
1183  // Return a pair of bool and a SmallVector pointer to a memoization entry.
1184  // The bool is true if something interesting was deduced, otherwise if we're
1185  // providing only a generic representation of V (or something else likewise
1186  // uninteresting for instruction selection) through the SmallVector.
1187  std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V,
1188  unsigned NumBits) {
1189  auto &ValueEntry = Memoizer[V];
1190  if (ValueEntry)
1191  return std::make_pair(ValueEntry->first, &ValueEntry->second);
1192  ValueEntry.reset(new ValueBitsMemoizedValue());
1193  bool &Interesting = ValueEntry->first;
1194  SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
1195  Bits.resize(NumBits);
1196 
1197  switch (V.getOpcode()) {
1198  default: break;
1199  case ISD::ROTL:
1200  if (isa<ConstantSDNode>(V.getOperand(1))) {
1201  unsigned RotAmt = V.getConstantOperandVal(1);
1202 
1203  const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1204 
1205  for (unsigned i = 0; i < NumBits; ++i)
1206  Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
1207 
1208  return std::make_pair(Interesting = true, &Bits);
1209  }
1210  break;
1211  case ISD::SHL:
1212  if (isa<ConstantSDNode>(V.getOperand(1))) {
1213  unsigned ShiftAmt = V.getConstantOperandVal(1);
1214 
1215  const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1216 
1217  for (unsigned i = ShiftAmt; i < NumBits; ++i)
1218  Bits[i] = LHSBits[i - ShiftAmt];
1219 
1220  for (unsigned i = 0; i < ShiftAmt; ++i)
1221  Bits[i] = ValueBit(ValueBit::ConstZero);
1222 
1223  return std::make_pair(Interesting = true, &Bits);
1224  }
1225  break;
1226  case ISD::SRL:
1227  if (isa<ConstantSDNode>(V.getOperand(1))) {
1228  unsigned ShiftAmt = V.getConstantOperandVal(1);
1229 
1230  const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1231 
1232  for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
1233  Bits[i] = LHSBits[i + ShiftAmt];
1234 
1235  for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
1236  Bits[i] = ValueBit(ValueBit::ConstZero);
1237 
1238  return std::make_pair(Interesting = true, &Bits);
1239  }
1240  break;
1241  case ISD::AND:
1242  if (isa<ConstantSDNode>(V.getOperand(1))) {
1243  uint64_t Mask = V.getConstantOperandVal(1);
1244 
1245  const SmallVector<ValueBit, 64> *LHSBits;
1246  // Mark this as interesting, only if the LHS was also interesting. This
1247  // prevents the overall procedure from matching a single immediate 'and'
1248  // (which is non-optimal because such an and might be folded with other
1249  // things if we don't select it here).
1250  std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);
1251 
1252  for (unsigned i = 0; i < NumBits; ++i)
1253  if (((Mask >> i) & 1) == 1)
1254  Bits[i] = (*LHSBits)[i];
1255  else {
1256  // AND instruction masks this bit. If the input is already zero,
1257  // we have nothing to do here. Otherwise, make the bit ConstZero.
1258  if ((*LHSBits)[i].isZero())
1259  Bits[i] = (*LHSBits)[i];
1260  else
1261  Bits[i] = ValueBit(ValueBit::ConstZero);
1262  }
1263 
1264  return std::make_pair(Interesting, &Bits);
1265  }
1266  break;
1267  case ISD::OR: {
1268  const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1269  const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
1270 
1271  bool AllDisjoint = true;
1272  SDValue LastVal = SDValue();
1273  unsigned LastIdx = 0;
1274  for (unsigned i = 0; i < NumBits; ++i) {
1275  if (LHSBits[i].isZero() && RHSBits[i].isZero()) {
1276  // If both inputs are known to be zero and one is ConstZero and
1277  // another is VariableKnownToBeZero, we can select whichever
1278  // we like. To minimize the number of bit groups, we select
1279  // VariableKnownToBeZero if this bit is the next bit of the same
1280  // input variable from the previous bit. Otherwise, we select
1281  // ConstZero.
1282  if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&
1283  LHSBits[i].getValueBitIndex() == LastIdx + 1)
1284  Bits[i] = LHSBits[i];
1285  else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&
1286  RHSBits[i].getValueBitIndex() == LastIdx + 1)
1287  Bits[i] = RHSBits[i];
1288  else
1289  Bits[i] = ValueBit(ValueBit::ConstZero);
1290  }
1291  else if (LHSBits[i].isZero())
1292  Bits[i] = RHSBits[i];
1293  else if (RHSBits[i].isZero())
1294  Bits[i] = LHSBits[i];
1295  else {
1296  AllDisjoint = false;
1297  break;
1298  }
1299  // We remember the value and bit index of this bit.
1300  if (Bits[i].hasValue()) {
1301  LastVal = Bits[i].getValue();
1302  LastIdx = Bits[i].getValueBitIndex();
1303  }
1304  else {
1305  if (LastVal) LastVal = SDValue();
1306  LastIdx = 0;
1307  }
1308  }
1309 
1310  if (!AllDisjoint)
1311  break;
1312 
1313  return std::make_pair(Interesting = true, &Bits);
1314  }
1315  case ISD::ZERO_EXTEND: {
1316  // We support only the case with zero extension from i32 to i64 so far.
1317  if (V.getValueType() != MVT::i64 ||
1318  V.getOperand(0).getValueType() != MVT::i32)
1319  break;
1320 
1321  const SmallVector<ValueBit, 64> *LHSBits;
1322  const unsigned NumOperandBits = 32;
1323  std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1324  NumOperandBits);
1325 
1326  for (unsigned i = 0; i < NumOperandBits; ++i)
1327  Bits[i] = (*LHSBits)[i];
1328 
1329  for (unsigned i = NumOperandBits; i < NumBits; ++i)
1330  Bits[i] = ValueBit(ValueBit::ConstZero);
1331 
1332  return std::make_pair(Interesting, &Bits);
1333  }
1334  case ISD::TRUNCATE: {
1336  EVT ToType = V.getValueType();
1337  // We support only the case with truncate from i64 to i32.
1338  if (FromType != MVT::i64 || ToType != MVT::i32)
1339  break;
1340  const unsigned NumAllBits = FromType.getSizeInBits();
1341  SmallVector<ValueBit, 64> *InBits;
1342  std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),
1343  NumAllBits);
1344  const unsigned NumValidBits = ToType.getSizeInBits();
1345 
1346  // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
1347  // So, we cannot include this truncate.
1348  bool UseUpper32bit = false;
1349  for (unsigned i = 0; i < NumValidBits; ++i)
1350  if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) {
1351  UseUpper32bit = true;
1352  break;
1353  }
1354  if (UseUpper32bit)
1355  break;
1356 
1357  for (unsigned i = 0; i < NumValidBits; ++i)
1358  Bits[i] = (*InBits)[i];
1359 
1360  return std::make_pair(Interesting, &Bits);
1361  }
1362  case ISD::AssertZext: {
1363  // For AssertZext, we look through the operand and
1364  // mark the bits known to be zero.
1365  const SmallVector<ValueBit, 64> *LHSBits;
1366  std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1367  NumBits);
1368 
1369  EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();
1370  const unsigned NumValidBits = FromType.getSizeInBits();
1371  for (unsigned i = 0; i < NumValidBits; ++i)
1372  Bits[i] = (*LHSBits)[i];
1373 
1374  // These bits are known to be zero.
1375  for (unsigned i = NumValidBits; i < NumBits; ++i)
1376  Bits[i] = ValueBit((*LHSBits)[i].getValue(),
1377  (*LHSBits)[i].getValueBitIndex(),
1378  ValueBit::VariableKnownToBeZero);
1379 
1380  return std::make_pair(Interesting, &Bits);
1381  }
1382  case ISD::LOAD:
1383  LoadSDNode *LD = cast<LoadSDNode>(V);
1384  if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) {
1385  EVT VT = LD->getMemoryVT();
1386  const unsigned NumValidBits = VT.getSizeInBits();
1387 
1388  for (unsigned i = 0; i < NumValidBits; ++i)
1389  Bits[i] = ValueBit(V, i);
1390 
1391  // These bits are known to be zero.
1392  for (unsigned i = NumValidBits; i < NumBits; ++i)
1393  Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero);
1394 
1395  // Zero-extending load itself cannot be optimized. So, it is not
1396  // interesting by itself though it gives useful information.
1397  return std::make_pair(Interesting = false, &Bits);
1398  }
1399  break;
1400  }
1401 
1402  for (unsigned i = 0; i < NumBits; ++i)
1403  Bits[i] = ValueBit(V, i);
1404 
1405  return std::make_pair(Interesting = false, &Bits);
1406  }
1407 
1408  // For each value (except the constant ones), compute the left-rotate amount
1409  // to get it from its original to final position.
1410  void computeRotationAmounts() {
1411  NeedMask = false;
1412  RLAmt.resize(Bits.size());
1413  for (unsigned i = 0; i < Bits.size(); ++i)
1414  if (Bits[i].hasValue()) {
1415  unsigned VBI = Bits[i].getValueBitIndex();
1416  if (i >= VBI)
1417  RLAmt[i] = i - VBI;
1418  else
1419  RLAmt[i] = Bits.size() - (VBI - i);
1420  } else if (Bits[i].isZero()) {
1421  NeedMask = true;
1422  RLAmt[i] = UINT32_MAX;
1423  } else {
1424  llvm_unreachable("Unknown value bit type");
1425  }
1426  }
1427 
1428  // Collect groups of consecutive bits with the same underlying value and
1429  // rotation factor. If we're doing late masking, we ignore zeros, otherwise
1430  // they break up groups.
1431  void collectBitGroups(bool LateMask) {
1432  BitGroups.clear();
1433 
1434  unsigned LastRLAmt = RLAmt[0];
1435  SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
1436  unsigned LastGroupStartIdx = 0;
1437  bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1438  for (unsigned i = 1; i < Bits.size(); ++i) {
1439  unsigned ThisRLAmt = RLAmt[i];
1440  SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
1441  if (LateMask && !ThisValue) {
1442  ThisValue = LastValue;
1443  ThisRLAmt = LastRLAmt;
1444  // If we're doing late masking, then the first bit group always starts
1445  // at zero (even if the first bits were zero).
1446  if (BitGroups.empty())
1447  LastGroupStartIdx = 0;
1448  }
1449 
1450  // If this bit is known to be zero and the current group is a bit group
1451  // of zeros, we do not need to terminate the current bit group even the
1452  // Value or RLAmt does not match here. Instead, we terminate this group
1453  // when the first non-zero bit appears later.
1454  if (IsGroupOfZeros && Bits[i].isZero())
1455  continue;
1456 
1457  // If this bit has the same underlying value and the same rotate factor as
1458  // the last one, then they're part of the same group.
1459  if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
1460  // We cannot continue the current group if this bits is not known to
1461  // be zero in a bit group of zeros.
1462  if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero()))
1463  continue;
1464 
1465  if (LastValue.getNode())
1466  BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1467  i-1));
1468  LastRLAmt = ThisRLAmt;
1469  LastValue = ThisValue;
1470  LastGroupStartIdx = i;
1471  IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1472  }
1473  if (LastValue.getNode())
1474  BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1475  Bits.size()-1));
1476 
1477  if (BitGroups.empty())
1478  return;
1479 
1480  // We might be able to combine the first and last groups.
1481  if (BitGroups.size() > 1) {
1482  // If the first and last groups are the same, then remove the first group
1483  // in favor of the last group, making the ending index of the last group
1484  // equal to the ending index of the to-be-removed first group.
1485  if (BitGroups[0].StartIdx == 0 &&
1486  BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
1487  BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
1488  BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
1489  LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
1490  BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
1491  BitGroups.erase(BitGroups.begin());
1492  }
1493  }
1494  }
1495 
1496  // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
1497  // associated with each. If the number of groups are same, we prefer a group
1498  // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate
1499  // instruction. If there is a degeneracy, pick the one that occurs
1500  // first (in the final value).
1501  void collectValueRotInfo() {
1502  ValueRots.clear();
1503 
1504  for (auto &BG : BitGroups) {
1505  unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
1506  ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
1507  VRI.V = BG.V;
1508  VRI.RLAmt = BG.RLAmt;
1509  VRI.Repl32 = BG.Repl32;
1510  VRI.NumGroups += 1;
1511  VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
1512  }
1513 
1514  // Now that we've collected the various ValueRotInfo instances, we need to
1515  // sort them.
1516  ValueRotsVec.clear();
1517  for (auto &I : ValueRots) {
1518  ValueRotsVec.push_back(I.second);
1519  }
1520  llvm::sort(ValueRotsVec);
1521  }
1522 
1523  // In 64-bit mode, rlwinm and friends have a rotation operator that
1524  // replicates the low-order 32 bits into the high-order 32-bits. The mask
1525  // indices of these instructions can only be in the lower 32 bits, so they
1526  // can only represent some 64-bit bit groups. However, when they can be used,
1527  // the 32-bit replication can be used to represent, as a single bit group,
1528  // otherwise separate bit groups. We'll convert to replicated-32-bit bit
1529  // groups when possible. Returns true if any of the bit groups were
1530  // converted.
1531  void assignRepl32BitGroups() {
1532  // If we have bits like this:
1533  //
1534  // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
1535  // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24
1536  // Groups: | RLAmt = 8 | RLAmt = 40 |
1537  //
1538  // But, making use of a 32-bit operation that replicates the low-order 32
1539  // bits into the high-order 32 bits, this can be one bit group with a RLAmt
1540  // of 8.
1541 
1542  auto IsAllLow32 = [this](BitGroup & BG) {
1543  if (BG.StartIdx <= BG.EndIdx) {
1544  for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
1545  if (!Bits[i].hasValue())
1546  continue;
1547  if (Bits[i].getValueBitIndex() >= 32)
1548  return false;
1549  }
1550  } else {
1551  for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
1552  if (!Bits[i].hasValue())
1553  continue;
1554  if (Bits[i].getValueBitIndex() >= 32)
1555  return false;
1556  }
1557  for (unsigned i = 0; i <= BG.EndIdx; ++i) {
1558  if (!Bits[i].hasValue())
1559  continue;
1560  if (Bits[i].getValueBitIndex() >= 32)
1561  return false;
1562  }
1563  }
1564 
1565  return true;
1566  };
1567 
1568  for (auto &BG : BitGroups) {
1569  // If this bit group has RLAmt of 0 and will not be merged with
1570  // another bit group, we don't benefit from Repl32. We don't mark
1571  // such group to give more freedom for later instruction selection.
1572  if (BG.RLAmt == 0) {
1573  auto PotentiallyMerged = [this](BitGroup & BG) {
1574  for (auto &BG2 : BitGroups)
1575  if (&BG != &BG2 && BG.V == BG2.V &&
1576  (BG2.RLAmt == 0 || BG2.RLAmt == 32))
1577  return true;
1578  return false;
1579  };
1580  if (!PotentiallyMerged(BG))
1581  continue;
1582  }
1583  if (BG.StartIdx < 32 && BG.EndIdx < 32) {
1584  if (IsAllLow32(BG)) {
1585  if (BG.RLAmt >= 32) {
1586  BG.RLAmt -= 32;
1587  BG.Repl32CR = true;
1588  }
1589 
1590  BG.Repl32 = true;
1591 
1592  LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for "
1593  << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " ["
1594  << BG.StartIdx << ", " << BG.EndIdx << "]\n");
1595  }
1596  }
1597  }
1598 
1599  // Now walk through the bit groups, consolidating where possible.
1600  for (auto I = BitGroups.begin(); I != BitGroups.end();) {
1601  // We might want to remove this bit group by merging it with the previous
1602  // group (which might be the ending group).
1603  auto IP = (I == BitGroups.begin()) ?
1604  std::prev(BitGroups.end()) : std::prev(I);
1605  if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&
1606  I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {
1607 
1608  LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for "
1609  << I->V.getNode() << " RLAmt = " << I->RLAmt << " ["
1610  << I->StartIdx << ", " << I->EndIdx
1611  << "] with group with range [" << IP->StartIdx << ", "
1612  << IP->EndIdx << "]\n");
1613 
1614  IP->EndIdx = I->EndIdx;
1615  IP->Repl32CR = IP->Repl32CR || I->Repl32CR;
1616  IP->Repl32Coalesced = true;
1617  I = BitGroups.erase(I);
1618  continue;
1619  } else {
1620  // There is a special case worth handling: If there is a single group
1621  // covering the entire upper 32 bits, and it can be merged with both
1622  // the next and previous groups (which might be the same group), then
1623  // do so. If it is the same group (so there will be only one group in
1624  // total), then we need to reverse the order of the range so that it
1625  // covers the entire 64 bits.
1626  if (I->StartIdx == 32 && I->EndIdx == 63) {
1627  assert(std::next(I) == BitGroups.end() &&
1628  "bit group ends at index 63 but there is another?");
1629  auto IN = BitGroups.begin();
1630 
1631  if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&
1632  (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&
1633  IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&
1634  IsAllLow32(*I)) {
1635 
1636  LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode()
1637  << " RLAmt = " << I->RLAmt << " [" << I->StartIdx
1638  << ", " << I->EndIdx
1639  << "] with 32-bit replicated groups with ranges ["
1640  << IP->StartIdx << ", " << IP->EndIdx << "] and ["
1641  << IN->StartIdx << ", " << IN->EndIdx << "]\n");
1642 
1643  if (IP == IN) {
1644  // There is only one other group; change it to cover the whole
1645  // range (backward, so that it can still be Repl32 but cover the
1646  // whole 64-bit range).
1647  IP->StartIdx = 31;
1648  IP->EndIdx = 30;
1649  IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;
1650  IP->Repl32Coalesced = true;
1651  I = BitGroups.erase(I);
1652  } else {
1653  // There are two separate groups, one before this group and one
1654  // after us (at the beginning). We're going to remove this group,
1655  // but also the group at the very beginning.
1656  IP->EndIdx = IN->EndIdx;
1657  IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;
1658  IP->Repl32Coalesced = true;
1659  I = BitGroups.erase(I);
1660  BitGroups.erase(BitGroups.begin());
1661  }
1662 
1663  // This must be the last group in the vector (and we might have
1664  // just invalidated the iterator above), so break here.
1665  break;
1666  }
1667  }
1668  }
1669 
1670  ++I;
1671  }
1672  }
1673 
1674  SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
1675  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1676  }
1677 
1678  uint64_t getZerosMask() {
1679  uint64_t Mask = 0;
1680  for (unsigned i = 0; i < Bits.size(); ++i) {
1681  if (Bits[i].hasValue())
1682  continue;
1683  Mask |= (UINT64_C(1) << i);
1684  }
1685 
1686  return ~Mask;
1687  }
1688 
1689  // This method extends an input value to 64 bit if input is 32-bit integer.
1690  // While selecting instructions in BitPermutationSelector in 64-bit mode,
1691  // an input value can be a 32-bit integer if a ZERO_EXTEND node is included.
1692  // In such case, we extend it to 64 bit to be consistent with other values.
1693  SDValue ExtendToInt64(SDValue V, const SDLoc &dl) {
1694  if (V.getValueSizeInBits() == 64)
1695  return V;
1696 
1697  assert(V.getValueSizeInBits() == 32);
1698  SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
1699  SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
1700  MVT::i64), 0);
1701  SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
1702  MVT::i64, ImDef, V,
1703  SubRegIdx), 0);
1704  return ExtVal;
1705  }
1706 
1707  SDValue TruncateToInt32(SDValue V, const SDLoc &dl) {
1708  if (V.getValueSizeInBits() == 32)
1709  return V;
1710 
1711  assert(V.getValueSizeInBits() == 64);
1712  SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
1713  SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,
1714  MVT::i32, V, SubRegIdx), 0);
1715  return SubVal;
1716  }
1717 
1718  // Depending on the number of groups for a particular value, it might be
1719  // better to rotate, mask explicitly (using andi/andis), and then or the
1720  // result. Select this part of the result first.
1721  void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
1723  return;
1724 
1725  for (ValueRotInfo &VRI : ValueRotsVec) {
1726  unsigned Mask = 0;
1727  for (unsigned i = 0; i < Bits.size(); ++i) {
1728  if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)
1729  continue;
1730  if (RLAmt[i] != VRI.RLAmt)
1731  continue;
1732  Mask |= (1u << i);
1733  }
1734 
1735  // Compute the masks for andi/andis that would be necessary.
1736  unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
1737  assert((ANDIMask != 0 || ANDISMask != 0) &&
1738  "No set bits in mask for value bit groups");
1739  bool NeedsRotate = VRI.RLAmt != 0;
1740 
1741  // We're trying to minimize the number of instructions. If we have one
1742  // group, using one of andi/andis can break even. If we have three
1743  // groups, we can use both andi and andis and break even (to use both
1744  // andi and andis we also need to or the results together). We need four
1745  // groups if we also need to rotate. To use andi/andis we need to do more
1746  // than break even because rotate-and-mask instructions tend to be easier
1747  // to schedule.
1748 
1749  // FIXME: We've biased here against using andi/andis, which is right for
1750  // POWER cores, but not optimal everywhere. For example, on the A2,
1751  // andi/andis have single-cycle latency whereas the rotate-and-mask
1752  // instructions take two cycles, and it would be better to bias toward
1753  // andi/andis in break-even cases.
1754 
1755  unsigned NumAndInsts = (unsigned) NeedsRotate +
1756  (unsigned) (ANDIMask != 0) +
1757  (unsigned) (ANDISMask != 0) +
1758  (unsigned) (ANDIMask != 0 && ANDISMask != 0) +
1759  (unsigned) (bool) Res;
1760 
1761  LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
1762  << " RL: " << VRI.RLAmt << ":"
1763  << "\n\t\t\tisel using masking: " << NumAndInsts
1764  << " using rotates: " << VRI.NumGroups << "\n");
1765 
1766  if (NumAndInsts >= VRI.NumGroups)
1767  continue;
1768 
1769  LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
1770 
1771  if (InstCnt) *InstCnt += NumAndInsts;
1772 
1773  SDValue VRot;
1774  if (VRI.RLAmt) {
1775  SDValue Ops[] =
1776  { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
1777  getI32Imm(0, dl), getI32Imm(31, dl) };
1778  VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
1779  Ops), 0);
1780  } else {
1781  VRot = TruncateToInt32(VRI.V, dl);
1782  }
1783 
1784  SDValue ANDIVal, ANDISVal;
1785  if (ANDIMask != 0)
1786  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
1787  VRot, getI32Imm(ANDIMask, dl)), 0);
1788  if (ANDISMask != 0)
1789  ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
1790  VRot, getI32Imm(ANDISMask, dl)), 0);
1791 
1792  SDValue TotalVal;
1793  if (!ANDIVal)
1794  TotalVal = ANDISVal;
1795  else if (!ANDISVal)
1796  TotalVal = ANDIVal;
1797  else
1798  TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1799  ANDIVal, ANDISVal), 0);
1800 
1801  if (!Res)
1802  Res = TotalVal;
1803  else
1804  Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1805  Res, TotalVal), 0);
1806 
1807  // Now, remove all groups with this underlying value and rotation
1808  // factor.
1809  eraseMatchingBitGroups([VRI](const BitGroup &BG) {
1810  return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
1811  });
1812  }
1813  }
1814 
1815  // Instruction selection for the 32-bit case.
1816  SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {
1817  SDLoc dl(N);
1818  SDValue Res;
1819 
1820  if (InstCnt) *InstCnt = 0;
1821 
1822  // Take care of cases that should use andi/andis first.
1823  SelectAndParts32(dl, Res, InstCnt);
1824 
1825  // If we've not yet selected a 'starting' instruction, and we have no zeros
1826  // to fill in, select the (Value, RLAmt) with the highest priority (largest
1827  // number of groups), and start with this rotated value.
1828  if ((!NeedMask || LateMask) && !Res) {
1829  ValueRotInfo &VRI = ValueRotsVec[0];
1830  if (VRI.RLAmt) {
1831  if (InstCnt) *InstCnt += 1;
1832  SDValue Ops[] =
1833  { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
1834  getI32Imm(0, dl), getI32Imm(31, dl) };
1835  Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
1836  0);
1837  } else {
1838  Res = TruncateToInt32(VRI.V, dl);
1839  }
1840 
1841  // Now, remove all groups with this underlying value and rotation factor.
1842  eraseMatchingBitGroups([VRI](const BitGroup &BG) {
1843  return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
1844  });
1845  }
1846 
1847  if (InstCnt) *InstCnt += BitGroups.size();
1848 
1849  // Insert the other groups (one at a time).
1850  for (auto &BG : BitGroups) {
1851  if (!Res) {
1852  SDValue Ops[] =
1853  { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
1854  getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
1855  getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
1856  Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
1857  } else {
1858  SDValue Ops[] =
1859  { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
1860  getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
1861  getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
1862  Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
1863  }
1864  }
1865 
1866  if (LateMask) {
1867  unsigned Mask = (unsigned) getZerosMask();
1868 
1869  unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
1870  assert((ANDIMask != 0 || ANDISMask != 0) &&
1871  "No set bits in zeros mask?");
1872 
1873  if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
1874  (unsigned) (ANDISMask != 0) +
1875  (unsigned) (ANDIMask != 0 && ANDISMask != 0);
1876 
1877  SDValue ANDIVal, ANDISVal;
1878  if (ANDIMask != 0)
1879  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
1880  Res, getI32Imm(ANDIMask, dl)), 0);
1881  if (ANDISMask != 0)
1882  ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
1883  Res, getI32Imm(ANDISMask, dl)), 0);
1884 
1885  if (!ANDIVal)
1886  Res = ANDISVal;
1887  else if (!ANDISVal)
1888  Res = ANDIVal;
1889  else
1890  Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1891  ANDIVal, ANDISVal), 0);
1892  }
1893 
1894  return Res.getNode();
1895  }
1896 
1897  unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
1898  unsigned MaskStart, unsigned MaskEnd,
1899  bool IsIns) {
1900  // In the notation used by the instructions, 'start' and 'end' are reversed
1901  // because bits are counted from high to low order.
1902  unsigned InstMaskStart = 64 - MaskEnd - 1,
1903  InstMaskEnd = 64 - MaskStart - 1;
1904 
1905  if (Repl32)
1906  return 1;
1907 
1908  if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
1909  InstMaskEnd == 63 - RLAmt)
1910  return 1;
1911 
1912  return 2;
1913  }
1914 
1915  // For 64-bit values, not all combinations of rotates and masks are
1916  // available. Produce one if it is available.
1917  SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt,
1918  bool Repl32, unsigned MaskStart, unsigned MaskEnd,
1919  unsigned *InstCnt = nullptr) {
1920  // In the notation used by the instructions, 'start' and 'end' are reversed
1921  // because bits are counted from high to low order.
1922  unsigned InstMaskStart = 64 - MaskEnd - 1,
1923  InstMaskEnd = 64 - MaskStart - 1;
1924 
1925  if (InstCnt) *InstCnt += 1;
1926 
1927  if (Repl32) {
1928  // This rotation amount assumes that the lower 32 bits of the quantity
1929  // are replicated in the high 32 bits by the rotation operator (which is
1930  // done by rlwinm and friends).
1931  assert(InstMaskStart >= 32 && "Mask cannot start out of range");
1932  assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
1933  SDValue Ops[] =
1934  { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1935  getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
1936  return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
1937  Ops), 0);
1938  }
1939 
1940  if (InstMaskEnd == 63) {
1941  SDValue Ops[] =
1942  { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1943  getI32Imm(InstMaskStart, dl) };
1944  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
1945  }
1946 
1947  if (InstMaskStart == 0) {
1948  SDValue Ops[] =
1949  { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1950  getI32Imm(InstMaskEnd, dl) };
1951  return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
1952  }
1953 
1954  if (InstMaskEnd == 63 - RLAmt) {
1955  SDValue Ops[] =
1956  { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1957  getI32Imm(InstMaskStart, dl) };
1958  return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
1959  }
1960 
1961  // We cannot do this with a single instruction, so we'll use two. The
1962  // problem is that we're not free to choose both a rotation amount and mask
1963  // start and end independently. We can choose an arbitrary mask start and
1964  // end, but then the rotation amount is fixed. Rotation, however, can be
1965  // inverted, and so by applying an "inverse" rotation first, we can get the
1966  // desired result.
1967  if (InstCnt) *InstCnt += 1;
1968 
1969  // The rotation mask for the second instruction must be MaskStart.
1970  unsigned RLAmt2 = MaskStart;
1971  // The first instruction must rotate V so that the overall rotation amount
1972  // is RLAmt.
1973  unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
1974  if (RLAmt1)
1975  V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
1976  return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);
1977  }
1978 
1979  // For 64-bit values, not all combinations of rotates and masks are
1980  // available. Produce a rotate-mask-and-insert if one is available.
1981  SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl,
1982  unsigned RLAmt, bool Repl32, unsigned MaskStart,
1983  unsigned MaskEnd, unsigned *InstCnt = nullptr) {
1984  // In the notation used by the instructions, 'start' and 'end' are reversed
1985  // because bits are counted from high to low order.
1986  unsigned InstMaskStart = 64 - MaskEnd - 1,
1987  InstMaskEnd = 64 - MaskStart - 1;
1988 
1989  if (InstCnt) *InstCnt += 1;
1990 
1991  if (Repl32) {
1992  // This rotation amount assumes that the lower 32 bits of the quantity
1993  // are replicated in the high 32 bits by the rotation operator (which is
1994  // done by rlwinm and friends).
1995  assert(InstMaskStart >= 32 && "Mask cannot start out of range");
1996  assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
1997  SDValue Ops[] =
1998  { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1999  getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2000  return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
2001  Ops), 0);
2002  }
2003 
2004  if (InstMaskEnd == 63 - RLAmt) {
2005  SDValue Ops[] =
2006  { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2007  getI32Imm(InstMaskStart, dl) };
2008  return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
2009  }
2010 
2011  // We cannot do this with a single instruction, so we'll use two. The
2012  // problem is that we're not free to choose both a rotation amount and mask
2013  // start and end independently. We can choose an arbitrary mask start and
2014  // end, but then the rotation amount is fixed. Rotation, however, can be
2015  // inverted, and so by applying an "inverse" rotation first, we can get the
2016  // desired result.
2017  if (InstCnt) *InstCnt += 1;
2018 
2019  // The rotation mask for the second instruction must be MaskStart.
2020  unsigned RLAmt2 = MaskStart;
2021  // The first instruction must rotate V so that the overall rotation amount
2022  // is RLAmt.
2023  unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2024  if (RLAmt1)
2025  V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2026  return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);
2027  }
2028 
2029  void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
2031  return;
2032 
2033  // The idea here is the same as in the 32-bit version, but with additional
2034  // complications from the fact that Repl32 might be true. Because we
2035  // aggressively convert bit groups to Repl32 form (which, for small
2036  // rotation factors, involves no other change), and then coalesce, it might
2037  // be the case that a single 64-bit masking operation could handle both
2038  // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
2039  // form allowed coalescing, then we must use a 32-bit rotaton in order to
2040  // completely capture the new combined bit group.
2041 
2042  for (ValueRotInfo &VRI : ValueRotsVec) {
2043  uint64_t Mask = 0;
2044 
2045  // We need to add to the mask all bits from the associated bit groups.
2046  // If Repl32 is false, we need to add bits from bit groups that have
2047  // Repl32 true, but are trivially convertable to Repl32 false. Such a
2048  // group is trivially convertable if it overlaps only with the lower 32
2049  // bits, and the group has not been coalesced.
2050  auto MatchingBG = [VRI](const BitGroup &BG) {
2051  if (VRI.V != BG.V)
2052  return false;
2053 
2054  unsigned EffRLAmt = BG.RLAmt;
2055  if (!VRI.Repl32 && BG.Repl32) {
2056  if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
2057  !BG.Repl32Coalesced) {
2058  if (BG.Repl32CR)
2059  EffRLAmt += 32;
2060  } else {
2061  return false;
2062  }
2063  } else if (VRI.Repl32 != BG.Repl32) {
2064  return false;
2065  }
2066 
2067  return VRI.RLAmt == EffRLAmt;
2068  };
2069 
2070  for (auto &BG : BitGroups) {
2071  if (!MatchingBG(BG))
2072  continue;
2073 
2074  if (BG.StartIdx <= BG.EndIdx) {
2075  for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
2076  Mask |= (UINT64_C(1) << i);
2077  } else {
2078  for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)
2079  Mask |= (UINT64_C(1) << i);
2080  for (unsigned i = 0; i <= BG.EndIdx; ++i)
2081  Mask |= (UINT64_C(1) << i);
2082  }
2083  }
2084 
2085  // We can use the 32-bit andi/andis technique if the mask does not
2086  // require any higher-order bits. This can save an instruction compared
2087  // to always using the general 64-bit technique.
2088  bool Use32BitInsts = isUInt<32>(Mask);
2089  // Compute the masks for andi/andis that would be necessary.
2090  unsigned ANDIMask = (Mask & UINT16_MAX),
2091  ANDISMask = (Mask >> 16) & UINT16_MAX;
2092 
2093  bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));
2094 
2095  unsigned NumAndInsts = (unsigned) NeedsRotate +
2096  (unsigned) (bool) Res;
2097  if (Use32BitInsts)
2098  NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
2099  (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2100  else
2101  NumAndInsts += selectI64ImmInstrCount(Mask) + /* and */ 1;
2102 
2103  unsigned NumRLInsts = 0;
2104  bool FirstBG = true;
2105  bool MoreBG = false;
2106  for (auto &BG : BitGroups) {
2107  if (!MatchingBG(BG)) {
2108  MoreBG = true;
2109  continue;
2110  }
2111  NumRLInsts +=
2112  SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
2113  !FirstBG);
2114  FirstBG = false;
2115  }
2116 
2117  LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
2118  << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":")
2119  << "\n\t\t\tisel using masking: " << NumAndInsts
2120  << " using rotates: " << NumRLInsts << "\n");
2121 
2122  // When we'd use andi/andis, we bias toward using the rotates (andi only
2123  // has a record form, and is cracked on POWER cores). However, when using
2124  // general 64-bit constant formation, bias toward the constant form,
2125  // because that exposes more opportunities for CSE.
2126  if (NumAndInsts > NumRLInsts)
2127  continue;
2128  // When merging multiple bit groups, instruction or is used.
2129  // But when rotate is used, rldimi can inert the rotated value into any
2130  // register, so instruction or can be avoided.
2131  if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)
2132  continue;
2133 
2134  LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2135 
2136  if (InstCnt) *InstCnt += NumAndInsts;
2137 
2138  SDValue VRot;
2139  // We actually need to generate a rotation if we have a non-zero rotation
2140  // factor or, in the Repl32 case, if we care about any of the
2141  // higher-order replicated bits. In the latter case, we generate a mask
2142  // backward so that it actually includes the entire 64 bits.
2143  if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))
2144  VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2145  VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);
2146  else
2147  VRot = VRI.V;
2148 
2149  SDValue TotalVal;
2150  if (Use32BitInsts) {
2151  assert((ANDIMask != 0 || ANDISMask != 0) &&
2152  "No set bits in mask when using 32-bit ands for 64-bit value");
2153 
2154  SDValue ANDIVal, ANDISVal;
2155  if (ANDIMask != 0)
2156  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
2157  ExtendToInt64(VRot, dl),
2158  getI32Imm(ANDIMask, dl)),
2159  0);
2160  if (ANDISMask != 0)
2161  ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
2162  ExtendToInt64(VRot, dl),
2163  getI32Imm(ANDISMask, dl)),
2164  0);
2165 
2166  if (!ANDIVal)
2167  TotalVal = ANDISVal;
2168  else if (!ANDISVal)
2169  TotalVal = ANDIVal;
2170  else
2171  TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2172  ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2173  } else {
2174  TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
2175  TotalVal =
2176  SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2177  ExtendToInt64(VRot, dl), TotalVal),
2178  0);
2179  }
2180 
2181  if (!Res)
2182  Res = TotalVal;
2183  else
2184  Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2185  ExtendToInt64(Res, dl), TotalVal),
2186  0);
2187 
2188  // Now, remove all groups with this underlying value and rotation
2189  // factor.
2190  eraseMatchingBitGroups(MatchingBG);
2191  }
2192  }
2193 
2194  // Instruction selection for the 64-bit case.
2195  SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {
2196  SDLoc dl(N);
2197  SDValue Res;
2198 
2199  if (InstCnt) *InstCnt = 0;
2200 
2201  // Take care of cases that should use andi/andis first.
2202  SelectAndParts64(dl, Res, InstCnt);
2203 
2204  // If we've not yet selected a 'starting' instruction, and we have no zeros
2205  // to fill in, select the (Value, RLAmt) with the highest priority (largest
2206  // number of groups), and start with this rotated value.
2207  if ((!NeedMask || LateMask) && !Res) {
2208  // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
2209  // groups will come first, and so the VRI representing the largest number
2210  // of groups might not be first (it might be the first Repl32 groups).
2211  unsigned MaxGroupsIdx = 0;
2212  if (!ValueRotsVec[0].Repl32) {
2213  for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)
2214  if (ValueRotsVec[i].Repl32) {
2215  if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
2216  MaxGroupsIdx = i;
2217  break;
2218  }
2219  }
2220 
2221  ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
2222  bool NeedsRotate = false;
2223  if (VRI.RLAmt) {
2224  NeedsRotate = true;
2225  } else if (VRI.Repl32) {
2226  for (auto &BG : BitGroups) {
2227  if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
2228  BG.Repl32 != VRI.Repl32)
2229  continue;
2230 
2231  // We don't need a rotate if the bit group is confined to the lower
2232  // 32 bits.
2233  if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
2234  continue;
2235 
2236  NeedsRotate = true;
2237  break;
2238  }
2239  }
2240 
2241  if (NeedsRotate)
2242  Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2243  VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,
2244  InstCnt);
2245  else
2246  Res = VRI.V;
2247 
2248  // Now, remove all groups with this underlying value and rotation factor.
2249  if (Res)
2250  eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2251  return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
2252  BG.Repl32 == VRI.Repl32;
2253  });
2254  }
2255 
2256  // Because 64-bit rotates are more flexible than inserts, we might have a
2257  // preference regarding which one we do first (to save one instruction).
2258  if (!Res)
2259  for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {
2260  if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2261  false) <
2262  SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2263  true)) {
2264  if (I != BitGroups.begin()) {
2265  BitGroup BG = *I;
2266  BitGroups.erase(I);
2267  BitGroups.insert(BitGroups.begin(), BG);
2268  }
2269 
2270  break;
2271  }
2272  }
2273 
2274  // Insert the other groups (one at a time).
2275  for (auto &BG : BitGroups) {
2276  if (!Res)
2277  Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
2278  BG.EndIdx, InstCnt);
2279  else
2280  Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
2281  BG.StartIdx, BG.EndIdx, InstCnt);
2282  }
2283 
2284  if (LateMask) {
2285  uint64_t Mask = getZerosMask();
2286 
2287  // We can use the 32-bit andi/andis technique if the mask does not
2288  // require any higher-order bits. This can save an instruction compared
2289  // to always using the general 64-bit technique.
2290  bool Use32BitInsts = isUInt<32>(Mask);
2291  // Compute the masks for andi/andis that would be necessary.
2292  unsigned ANDIMask = (Mask & UINT16_MAX),
2293  ANDISMask = (Mask >> 16) & UINT16_MAX;
2294 
2295  if (Use32BitInsts) {
2296  assert((ANDIMask != 0 || ANDISMask != 0) &&
2297  "No set bits in mask when using 32-bit ands for 64-bit value");
2298 
2299  if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2300  (unsigned) (ANDISMask != 0) +
2301  (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2302 
2303  SDValue ANDIVal, ANDISVal;
2304  if (ANDIMask != 0)
2305  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
2306  ExtendToInt64(Res, dl), getI32Imm(ANDIMask, dl)), 0);
2307  if (ANDISMask != 0)
2308  ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
2309  ExtendToInt64(Res, dl), getI32Imm(ANDISMask, dl)), 0);
2310 
2311  if (!ANDIVal)
2312  Res = ANDISVal;
2313  else if (!ANDISVal)
2314  Res = ANDIVal;
2315  else
2316  Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2317  ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2318  } else {
2319  if (InstCnt) *InstCnt += selectI64ImmInstrCount(Mask) + /* and */ 1;
2320 
2321  SDValue MaskVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
2322  Res =
2323  SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2324  ExtendToInt64(Res, dl), MaskVal), 0);
2325  }
2326  }
2327 
2328  return Res.getNode();
2329  }
2330 
2331  SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {
2332  // Fill in BitGroups.
2333  collectBitGroups(LateMask);
2334  if (BitGroups.empty())
2335  return nullptr;
2336 
2337  // For 64-bit values, figure out when we can use 32-bit instructions.
2338  if (Bits.size() == 64)
2339  assignRepl32BitGroups();
2340 
2341  // Fill in ValueRotsVec.
2342  collectValueRotInfo();
2343 
2344  if (Bits.size() == 32) {
2345  return Select32(N, LateMask, InstCnt);
2346  } else {
2347  assert(Bits.size() == 64 && "Not 64 bits here?");
2348  return Select64(N, LateMask, InstCnt);
2349  }
2350 
2351  return nullptr;
2352  }
2353 
2354  void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
2355  BitGroups.erase(remove_if(BitGroups, F), BitGroups.end());
2356  }
2357 
2359 
2360  bool NeedMask;
2362 
2363  SmallVector<BitGroup, 16> BitGroups;
2364 
2365  DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
2366  SmallVector<ValueRotInfo, 16> ValueRotsVec;
2367 
2368  SelectionDAG *CurDAG;
2369 
2370 public:
2371  BitPermutationSelector(SelectionDAG *DAG)
2372  : CurDAG(DAG) {}
2373 
2374  // Here we try to match complex bit permutations into a set of
2375  // rotate-and-shift/shift/and/or instructions, using a set of heuristics
2376  // known to produce optimial code for common cases (like i32 byte swapping).
2377  SDNode *Select(SDNode *N) {
2378  Memoizer.clear();
2379  auto Result =
2380  getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits());
2381  if (!Result.first)
2382  return nullptr;
2383  Bits = std::move(*Result.second);
2384 
2385  LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction"
2386  " selection for: ");
2387  LLVM_DEBUG(N->dump(CurDAG));
2388 
2389  // Fill it RLAmt and set NeedMask.
2390  computeRotationAmounts();
2391 
2392  if (!NeedMask)
2393  return Select(N, false);
2394 
2395  // We currently have two techniques for handling results with zeros: early
2396  // masking (the default) and late masking. Late masking is sometimes more
2397  // efficient, but because the structure of the bit groups is different, it
2398  // is hard to tell without generating both and comparing the results. With
2399  // late masking, we ignore zeros in the resulting value when inserting each
2400  // set of bit groups, and then mask in the zeros at the end. With early
2401  // masking, we only insert the non-zero parts of the result at every step.
2402 
2403  unsigned InstCnt = 0, InstCntLateMask = 0;
2404  LLVM_DEBUG(dbgs() << "\tEarly masking:\n");
2405  SDNode *RN = Select(N, false, &InstCnt);
2406  LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n");
2407 
2408  LLVM_DEBUG(dbgs() << "\tLate masking:\n");
2409  SDNode *RNLM = Select(N, true, &InstCntLateMask);
2410  LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask
2411  << " instructions\n");
2412 
2413  if (InstCnt <= InstCntLateMask) {
2414  LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n");
2415  return RN;
2416  }
2417 
2418  LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n");
2419  return RNLM;
2420  }
2421 };
2422 
2423 class IntegerCompareEliminator {
2424  SelectionDAG *CurDAG;
2425  PPCDAGToDAGISel *S;
2426  // Conversion type for interpreting results of a 32-bit instruction as
2427  // a 64-bit value or vice versa.
2428  enum ExtOrTruncConversion { Ext, Trunc };
2429 
2430  // Modifiers to guide how an ISD::SETCC node's result is to be computed
2431  // in a GPR.
2432  // ZExtOrig - use the original condition code, zero-extend value
2433  // ZExtInvert - invert the condition code, zero-extend value
2434  // SExtOrig - use the original condition code, sign-extend value
2435  // SExtInvert - invert the condition code, sign-extend value
2436  enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };
2437 
2438  // Comparisons against zero to emit GPR code sequences for. Each of these
2439  // sequences may need to be emitted for two or more equivalent patterns.
2440  // For example (a >= 0) == (a > -1). The direction of the comparison (</>)
2441  // matters as well as the extension type: sext (-1/0), zext (1/0).
2442  // GEZExt - (zext (LHS >= 0))
2443  // GESExt - (sext (LHS >= 0))
2444  // LEZExt - (zext (LHS <= 0))
2445  // LESExt - (sext (LHS <= 0))
2446  enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt };
2447 
2448  SDNode *tryEXTEND(SDNode *N);
2449  SDNode *tryLogicOpOfCompares(SDNode *N);
2450  SDValue computeLogicOpInGPR(SDValue LogicOp);
2451  SDValue signExtendInputIfNeeded(SDValue Input);
2452  SDValue zeroExtendInputIfNeeded(SDValue Input);
2453  SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);
2454  SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
2455  ZeroCompare CmpTy);
2456  SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2457  int64_t RHSValue, SDLoc dl);
2458  SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2459  int64_t RHSValue, SDLoc dl);
2460  SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2461  int64_t RHSValue, SDLoc dl);
2462  SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2463  int64_t RHSValue, SDLoc dl);
2464  SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts);
2465 
2466 public:
2467  IntegerCompareEliminator(SelectionDAG *DAG,
2468  PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) {
2469  assert(CurDAG->getTargetLoweringInfo()
2470  .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 &&
2471  "Only expecting to use this on 64 bit targets.");
2472  }
2473  SDNode *Select(SDNode *N) {
2474  if (CmpInGPR == ICGPR_None)
2475  return nullptr;
2476  switch (N->getOpcode()) {
2477  default: break;
2478  case ISD::ZERO_EXTEND:
2479  if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 ||
2480  CmpInGPR == ICGPR_SextI64)
2481  return nullptr;
2483  case ISD::SIGN_EXTEND:
2484  if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 ||
2486  return nullptr;
2487  return tryEXTEND(N);
2488  case ISD::AND:
2489  case ISD::OR:
2490  case ISD::XOR:
2491  return tryLogicOpOfCompares(N);
2492  }
2493  return nullptr;
2494  }
2495 };
2496 
2497 static bool isLogicOp(unsigned Opc) {
2498  return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR;
2499 }
2500 // The obvious case for wanting to keep the value in a GPR. Namely, the
2501 // result of the comparison is actually needed in a GPR.
2502 SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) {
2503  assert((N->getOpcode() == ISD::ZERO_EXTEND ||
2504  N->getOpcode() == ISD::SIGN_EXTEND) &&
2505  "Expecting a zero/sign extend node!");
2506  SDValue WideRes;
2507  // If we are zero-extending the result of a logical operation on i1
2508  // values, we can keep the values in GPRs.
2509  if (isLogicOp(N->getOperand(0).getOpcode()) &&
2510  N->getOperand(0).getValueType() == MVT::i1 &&
2511  N->getOpcode() == ISD::ZERO_EXTEND)
2512  WideRes = computeLogicOpInGPR(N->getOperand(0));
2513  else if (N->getOperand(0).getOpcode() != ISD::SETCC)
2514  return nullptr;
2515  else
2516  WideRes =
2517  getSETCCInGPR(N->getOperand(0),
2518  N->getOpcode() == ISD::SIGN_EXTEND ?
2519  SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
2520 
2521  if (!WideRes)
2522  return nullptr;
2523 
2524  SDLoc dl(N);
2525  bool Input32Bit = WideRes.getValueType() == MVT::i32;
2526  bool Output32Bit = N->getValueType(0) == MVT::i32;
2527 
2528  NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0;
2529  NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1;
2530 
2531  SDValue ConvOp = WideRes;
2532  if (Input32Bit != Output32Bit)
2533  ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext :
2534  ExtOrTruncConversion::Trunc);
2535  return ConvOp.getNode();
2536 }
2537 
2538 // Attempt to perform logical operations on the results of comparisons while
2539 // keeping the values in GPRs. Without doing so, these would end up being
2540 // lowered to CR-logical operations which suffer from significant latency and
2541 // low ILP.
2542 SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {
2543  if (N->getValueType(0) != MVT::i1)
2544  return nullptr;
2545  assert(isLogicOp(N->getOpcode()) &&
2546  "Expected a logic operation on setcc results.");
2547  SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0));
2548  if (!LoweredLogical)
2549  return nullptr;
2550 
2551  SDLoc dl(N);
2552  bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8;
2553  unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;
2554  SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
2555  SDValue LHS = LoweredLogical.getOperand(0);
2556  SDValue RHS = LoweredLogical.getOperand(1);
2557  SDValue WideOp;
2558  SDValue OpToConvToRecForm;
2559 
2560  // Look through any 32-bit to 64-bit implicit extend nodes to find the
2561  // opcode that is input to the XORI.
2562  if (IsBitwiseNegate &&
2563  LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG)
2564  OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1);
2565  else if (IsBitwiseNegate)
2566  // If the input to the XORI isn't an extension, that's what we're after.
2567  OpToConvToRecForm = LoweredLogical.getOperand(0);
2568  else
2569  // If this is not an XORI, it is a reg-reg logical op and we can convert
2570  // it to record-form.
2571  OpToConvToRecForm = LoweredLogical;
2572 
2573  // Get the record-form version of the node we're looking to use to get the
2574  // CR result from.
2575  uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode();
2576  int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc);
2577 
2578  // Convert the right node to record-form. This is either the logical we're
2579  // looking at or it is the input node to the negation (if we're looking at
2580  // a bitwise negation).
2581  if (NewOpc != -1 && IsBitwiseNegate) {
2582  // The input to the XORI has a record-form. Use it.
2583  assert(LoweredLogical.getConstantOperandVal(1) == 1 &&
2584  "Expected a PPC::XORI8 only for bitwise negation.");
2585  // Emit the record-form instruction.
2586  std::vector<SDValue> Ops;
2587  for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++)
2588  Ops.push_back(OpToConvToRecForm.getOperand(i));
2589 
2590  WideOp =
2591  SDValue(CurDAG->getMachineNode(NewOpc, dl,
2592  OpToConvToRecForm.getValueType(),
2593  MVT::Glue, Ops), 0);
2594  } else {
2595  assert((NewOpc != -1 || !IsBitwiseNegate) &&
2596  "No record form available for AND8/OR8/XOR8?");
2597  WideOp =
2598  SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDIo8 : NewOpc, dl,
2599  MVT::i64, MVT::Glue, LHS, RHS), 0);
2600  }
2601 
2602  // Select this node to a single bit from CR0 set by the record-form node
2603  // just created. For bitwise negation, use the EQ bit which is the equivalent
2604  // of negating the result (i.e. it is a bit set when the result of the
2605  // operation is zero).
2606  SDValue SRIdxVal =
2607  CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32);
2608  SDValue CRBit =
2609  SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
2610  MVT::i1, CR0Reg, SRIdxVal,
2611  WideOp.getValue(1)), 0);
2612  return CRBit.getNode();
2613 }
2614 
2615 // Lower a logical operation on i1 values into a GPR sequence if possible.
2616 // The result can be kept in a GPR if requested.
2617 // Three types of inputs can be handled:
2618 // - SETCC
2619 // - TRUNCATE
2620 // - Logical operation (AND/OR/XOR)
2621 // There is also a special case that is handled (namely a complement operation
2622 // achieved with xor %a, -1).
2623 SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) {
2624  assert(isLogicOp(LogicOp.getOpcode()) &&
2625  "Can only handle logic operations here.");
2626  assert(LogicOp.getValueType() == MVT::i1 &&
2627  "Can only handle logic operations on i1 values here.");
2628  SDLoc dl(LogicOp);
2629  SDValue LHS, RHS;
2630 
2631  // Special case: xor %a, -1
2632  bool IsBitwiseNegation = isBitwiseNot(LogicOp);
2633 
2634  // Produces a GPR sequence for each operand of the binary logic operation.
2635  // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
2636  // the value in a GPR and for logic operations, it will recursively produce
2637  // a GPR sequence for the operation.
2638  auto getLogicOperand = [&] (SDValue Operand) -> SDValue {
2639  unsigned OperandOpcode = Operand.getOpcode();
2640  if (OperandOpcode == ISD::SETCC)
2641  return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig);
2642  else if (OperandOpcode == ISD::TRUNCATE) {
2643  SDValue InputOp = Operand.getOperand(0);
2644  EVT InVT = InputOp.getValueType();
2645  return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 :
2646  PPC::RLDICL, dl, InVT, InputOp,
2647  S->getI64Imm(0, dl),
2648  S->getI64Imm(63, dl)), 0);
2649  } else if (isLogicOp(OperandOpcode))
2650  return computeLogicOpInGPR(Operand);
2651  return SDValue();
2652  };
2653  LHS = getLogicOperand(LogicOp.getOperand(0));
2654  RHS = getLogicOperand(LogicOp.getOperand(1));
2655 
2656  // If a GPR sequence can't be produced for the LHS we can't proceed.
2657  // Not producing a GPR sequence for the RHS is only a problem if this isn't
2658  // a bitwise negation operation.
2659  if (!LHS || (!RHS && !IsBitwiseNegation))
2660  return SDValue();
2661 
2662  NumLogicOpsOnComparison++;
2663 
2664  // We will use the inputs as 64-bit values.
2665  if (LHS.getValueType() == MVT::i32)
2666  LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext);
2667  if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32)
2668  RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext);
2669 
2670  unsigned NewOpc;
2671  switch (LogicOp.getOpcode()) {
2672  default: llvm_unreachable("Unknown logic operation.");
2673  case ISD::AND: NewOpc = PPC::AND8; break;
2674  case ISD::OR: NewOpc = PPC::OR8; break;
2675  case ISD::XOR: NewOpc = PPC::XOR8; break;
2676  }
2677 
2678  if (IsBitwiseNegation) {
2679  RHS = S->getI64Imm(1, dl);
2680  NewOpc = PPC::XORI8;
2681  }
2682 
2683  return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0);
2684 
2685 }
2686 
2687 /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
2688 /// Otherwise just reinterpret it as a 64-bit value.
2689 /// Useful when emitting comparison code for 32-bit values without using
2690 /// the compare instruction (which only considers the lower 32-bits).
2691 SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) {
2692  assert(Input.getValueType() == MVT::i32 &&
2693  "Can only sign-extend 32-bit values here.");
2694  unsigned Opc = Input.getOpcode();
2695 
2696  // The value was sign extended and then truncated to 32-bits. No need to
2697  // sign extend it again.
2698  if (Opc == ISD::TRUNCATE &&
2699  (Input.getOperand(0).getOpcode() == ISD::AssertSext ||
2700  Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND))
2701  return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2702 
2703  LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
2704  // The input is a sign-extending load. All ppc sign-extending loads
2705  // sign-extend to the full 64-bits.
2706  if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD)
2707  return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2708 
2709  ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
2710  // We don't sign-extend constants.
2711  if (InputConst)
2712  return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2713 
2714  SDLoc dl(Input);
2715  SignExtensionsAdded++;
2716  return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl,
2717  MVT::i64, Input), 0);
2718 }
2719 
2720 /// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
2721 /// Otherwise just reinterpret it as a 64-bit value.
2722 /// Useful when emitting comparison code for 32-bit values without using
2723 /// the compare instruction (which only considers the lower 32-bits).
2724 SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) {
2725  assert(Input.getValueType() == MVT::i32 &&
2726  "Can only zero-extend 32-bit values here.");
2727  unsigned Opc = Input.getOpcode();
2728 
2729  // The only condition under which we can omit the actual extend instruction:
2730  // - The value is a positive constant
2731  // - The value comes from a load that isn't a sign-extending load
2732  // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.
2733  bool IsTruncateOfZExt = Opc == ISD::TRUNCATE &&
2734  (Input.getOperand(0).getOpcode() == ISD::AssertZext ||
2735  Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND);
2736  if (IsTruncateOfZExt)
2737  return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2738 
2739  ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
2740  if (InputConst && InputConst->getSExtValue() >= 0)
2741  return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2742 
2743  LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
2744  // The input is a load that doesn't sign-extend (it will be zero-extended).
2745  if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD)
2746  return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2747 
2748  // None of the above, need to zero-extend.
2749  SDLoc dl(Input);
2750  ZeroExtensionsAdded++;
2751  return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input,
2752  S->getI64Imm(0, dl),
2753  S->getI64Imm(32, dl)), 0);
2754 }
2755 
2756 // Handle a 32-bit value in a 64-bit register and vice-versa. These are of
2757 // course not actual zero/sign extensions that will generate machine code,
2758 // they're just a way to reinterpret a 32 bit value in a register as a
2759 // 64 bit value and vice-versa.
2760 SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes,
2761  ExtOrTruncConversion Conv) {
2762  SDLoc dl(NatWidthRes);
2763 
2764  // For reinterpreting 32-bit values as 64 bit values, we generate
2765  // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
2766  if (Conv == ExtOrTruncConversion::Ext) {
2767  SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0);
2768  SDValue SubRegIdx =
2769  CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2770  return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64,
2771  ImDef, NatWidthRes, SubRegIdx), 0);
2772  }
2773 
2774  assert(Conv == ExtOrTruncConversion::Trunc &&
2775  "Unknown convertion between 32 and 64 bit values.");
2776  // For reinterpreting 64-bit values as 32-bit values, we just need to
2777  // EXTRACT_SUBREG (i.e. extract the low word).
2778  SDValue SubRegIdx =
2779  CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2780  return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32,
2781  NatWidthRes, SubRegIdx), 0);
2782 }
2783 
2784 // Produce a GPR sequence for compound comparisons (<=, >=) against zero.
2785 // Handle both zero-extensions and sign-extensions.
2786 SDValue
2787 IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
2788  ZeroCompare CmpTy) {
2789  EVT InVT = LHS.getValueType();
2790  bool Is32Bit = InVT == MVT::i32;
2791  SDValue ToExtend;
2792 
2793  // Produce the value that needs to be either zero or sign extended.
2794  switch (CmpTy) {
2795  case ZeroCompare::GEZExt:
2796  case ZeroCompare::GESExt:
2797  ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8,
2798  dl, InVT, LHS, LHS), 0);
2799  break;
2800  case ZeroCompare::LEZExt:
2801  case ZeroCompare::LESExt: {
2802  if (Is32Bit) {
2803  // Upper 32 bits cannot be undefined for this sequence.
2804  LHS = signExtendInputIfNeeded(LHS);
2805  SDValue Neg =
2806  SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
2807  ToExtend =
2808  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2809  Neg, S->getI64Imm(1, dl),
2810  S->getI64Imm(63, dl)), 0);
2811  } else {
2812  SDValue Addi =
2813  SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
2814  S->getI64Imm(~0ULL, dl)), 0);
2815  ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2816  Addi, LHS), 0);
2817  }
2818  break;
2819  }
2820  }
2821 
2822  // For 64-bit sequences, the extensions are the same for the GE/LE cases.
2823  if (!Is32Bit &&
2824  (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt))
2825  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2826  ToExtend, S->getI64Imm(1, dl),
2827  S->getI64Imm(63, dl)), 0);
2828  if (!Is32Bit &&
2829  (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt))
2830  return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend,
2831  S->getI64Imm(63, dl)), 0);
2832 
2833  assert(Is32Bit && "Should have handled the 32-bit sequences above.");
2834  // For 32-bit sequences, the extensions differ between GE/LE cases.
2835  switch (CmpTy) {
2836  case ZeroCompare::GEZExt: {
2837  SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
2838  S->getI32Imm(31, dl) };
2839  return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2840  ShiftOps), 0);
2841  }
2842  case ZeroCompare::GESExt:
2843  return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend,
2844  S->getI32Imm(31, dl)), 0);
2845  case ZeroCompare::LEZExt:
2846  return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend,
2847  S->getI32Imm(1, dl)), 0);
2848  case ZeroCompare::LESExt:
2849  return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend,
2850  S->getI32Imm(-1, dl)), 0);
2851  }
2852 
2853  // The above case covers all the enumerators so it can't have a default clause
2854  // to avoid compiler warnings.
2855  llvm_unreachable("Unknown zero-comparison type.");
2856 }
2857 
2858 /// Produces a zero-extended result of comparing two 32-bit values according to
2859 /// the passed condition code.
2860 SDValue
2861 IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,
2862  ISD::CondCode CC,
2863  int64_t RHSValue, SDLoc dl) {
2864  if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
2866  return SDValue();
2867  bool IsRHSZero = RHSValue == 0;
2868  bool IsRHSOne = RHSValue == 1;
2869  bool IsRHSNegOne = RHSValue == -1LL;
2870  switch (CC) {
2871  default: return SDValue();
2872  case ISD::SETEQ: {
2873  // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
2874  // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5)
2875  SDValue Xor = IsRHSZero ? LHS :
2876  SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
2877  SDValue Clz =
2878  SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
2879  SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
2880  S->getI32Imm(31, dl) };
2881  return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2882  ShiftOps), 0);
2883  }
2884  case ISD::SETNE: {
2885  // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
2886  // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1)
2887  SDValue Xor = IsRHSZero ? LHS :
2888  SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
2889  SDValue Clz =
2890  SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
2891  SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
2892  S->getI32Imm(31, dl) };
2893  SDValue Shift =
2894  SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
2895  return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
2896  S->getI32Imm(1, dl)), 0);
2897  }
2898  case ISD::SETGE: {
2899  // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)
2900  // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31)
2901  if(IsRHSZero)
2902  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
2903 
2904  // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
2905  // by swapping inputs and falling through.
2906  std::swap(LHS, RHS);
2907  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
2908  IsRHSZero = RHSConst && RHSConst->isNullValue();
2910  }
2911  case ISD::SETLE: {
2912  if (CmpInGPR == ICGPR_NonExtIn)
2913  return SDValue();
2914  // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)
2915  // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1)
2916  if(IsRHSZero) {
2917  if (CmpInGPR == ICGPR_NonExtIn)
2918  return SDValue();
2919  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
2920  }
2921 
2922  // The upper 32-bits of the register can't be undefined for this sequence.
2923  LHS = signExtendInputIfNeeded(LHS);
2924  RHS = signExtendInputIfNeeded(RHS);
2925  SDValue Sub =
2926  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
2927  SDValue Shift =
2928  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub,
2929  S->getI64Imm(1, dl), S->getI64Imm(63, dl)),
2930  0);
2931  return
2932  SDValue(CurDAG->getMachineNode(PPC::XORI8, dl,
2933  MVT::i64, Shift, S->getI32Imm(1, dl)), 0);
2934  }
2935  case ISD::SETGT: {
2936  // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)
2937  // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)
2938  // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63)
2939  // Handle SETLT -1 (which is equivalent to SETGE 0).
2940  if (IsRHSNegOne)
2941  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
2942 
2943  if (IsRHSZero) {
2944  if (CmpInGPR == ICGPR_NonExtIn)
2945  return SDValue();
2946  // The upper 32-bits of the register can't be undefined for this sequence.
2947  LHS = signExtendInputIfNeeded(LHS);
2948  RHS = signExtendInputIfNeeded(RHS);
2949  SDValue Neg =
2950  SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
2951  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2952  Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0);
2953  }
2954  // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
2955  // (%b < %a) by swapping inputs and falling through.
2956  std::swap(LHS, RHS);
2957  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
2958  IsRHSZero = RHSConst && RHSConst->isNullValue();
2959  IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
2961  }
2962  case ISD::SETLT: {
2963  // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)
2964  // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1)
2965  // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31)
2966  // Handle SETLT 1 (which is equivalent to SETLE 0).
2967  if (IsRHSOne) {
2968  if (CmpInGPR == ICGPR_NonExtIn)
2969  return SDValue();
2970  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
2971  }
2972 
2973  if (IsRHSZero) {
2974  SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
2975  S->getI32Imm(31, dl) };
2976  return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2977  ShiftOps), 0);
2978  }
2979 
2980  if (CmpInGPR == ICGPR_NonExtIn)
2981  return SDValue();
2982  // The upper 32-bits of the register can't be undefined for this sequence.
2983  LHS = signExtendInputIfNeeded(LHS);
2984  RHS = signExtendInputIfNeeded(RHS);
2985  SDValue SUBFNode =
2986  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
2987  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2988  SUBFNode, S->getI64Imm(1, dl),
2989  S->getI64Imm(63, dl)), 0);
2990  }
2991  case ISD::SETUGE:
2992  // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)
2993  // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)
2994  std::swap(LHS, RHS);
2996  case ISD::SETULE: {
2997  if (CmpInGPR == ICGPR_NonExtIn)
2998  return SDValue();
2999  // The upper 32-bits of the register can't be undefined for this sequence.
3000  LHS = zeroExtendInputIfNeeded(LHS);
3001  RHS = zeroExtendInputIfNeeded(RHS);
3002  SDValue Subtract =
3003  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3004  SDValue SrdiNode =
3005  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3006  Subtract, S->getI64Imm(1, dl),
3007  S->getI64Imm(63, dl)), 0);
3008  return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode,
3009  S->getI32Imm(1, dl)), 0);
3010  }
3011  case ISD::SETUGT:
3012  // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)
3013  // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)
3014  std::swap(LHS, RHS);
3016  case ISD::SETULT: {
3017  if (CmpInGPR == ICGPR_NonExtIn)
3018  return SDValue();
3019  // The upper 32-bits of the register can't be undefined for this sequence.
3020  LHS = zeroExtendInputIfNeeded(LHS);
3021  RHS = zeroExtendInputIfNeeded(RHS);
3022  SDValue Subtract =
3023  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3024  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3025  Subtract, S->getI64Imm(1, dl),
3026  S->getI64Imm(63, dl)), 0);
3027  }
3028  }
3029 }
3030 
3031 /// Produces a sign-extended result of comparing two 32-bit values according to
3032 /// the passed condition code.
3033 SDValue
3034 IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,
3035  ISD::CondCode CC,
3036  int64_t RHSValue, SDLoc dl) {
3037  if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
3039  return SDValue();
3040  bool IsRHSZero = RHSValue == 0;
3041  bool IsRHSOne = RHSValue == 1;
3042  bool IsRHSNegOne = RHSValue == -1LL;
3043 
3044  switch (CC) {
3045  default: return SDValue();
3046  case ISD::SETEQ: {
3047  // (sext (setcc %a, %b, seteq)) ->
3048  // (ashr (shl (ctlz (xor %a, %b)), 58), 63)
3049  // (sext (setcc %a, 0, seteq)) ->
3050  // (ashr (shl (ctlz %a), 58), 63)
3051  SDValue CountInput = IsRHSZero ? LHS :
3052  SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3053  SDValue Cntlzw =
3054  SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0);
3055  SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl),
3056  S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3057  SDValue Slwi =
3058  SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0);
3059  return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0);
3060  }
3061  case ISD::SETNE: {
3062  // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
3063  // flip the bit, finally take 2's complement.
3064  // (sext (setcc %a, %b, setne)) ->
3065  // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
3066  // Same as above, but the first xor is not needed.
3067  // (sext (setcc %a, 0, setne)) ->
3068  // (neg (xor (lshr (ctlz %a), 5), 1))
3069  SDValue Xor = IsRHSZero ? LHS :
3070  SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3071  SDValue Clz =
3072  SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3073  SDValue ShiftOps[] =
3074  { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3075  SDValue Shift =
3076  SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
3077  SDValue Xori =
3078  SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
3079  S->getI32Imm(1, dl)), 0);
3080  return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0);
3081  }
3082  case ISD::SETGE: {
3083  // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)
3084  // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31)
3085  if (IsRHSZero)
3086  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3087 
3088  // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3089  // by swapping inputs and falling through.
3090  std::swap(LHS, RHS);
3091  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3092  IsRHSZero = RHSConst && RHSConst->isNullValue();
3094  }
3095  case ISD::SETLE: {
3096  if (CmpInGPR == ICGPR_NonExtIn)
3097  return SDValue();
3098  // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)
3099  // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1)
3100  if (IsRHSZero)
3101  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3102 
3103  // The upper 32-bits of the register can't be undefined for this sequence.
3104  LHS = signExtendInputIfNeeded(LHS);
3105  RHS = signExtendInputIfNeeded(RHS);
3106  SDValue SUBFNode =
3107  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue,
3108  LHS, RHS), 0);
3109  SDValue Srdi =
3110  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3111  SUBFNode, S->getI64Imm(1, dl),
3112  S->getI64Imm(63, dl)), 0);
3113  return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi,
3114  S->getI32Imm(-1, dl)), 0);
3115  }
3116  case ISD::SETGT: {
3117  // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)
3118  // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)
3119  // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63)
3120  if (IsRHSNegOne)
3121  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3122  if (IsRHSZero) {
3123  if (CmpInGPR == ICGPR_NonExtIn)
3124  return SDValue();
3125  // The upper 32-bits of the register can't be undefined for this sequence.
3126  LHS = signExtendInputIfNeeded(LHS);
3127  RHS = signExtendInputIfNeeded(RHS);
3128  SDValue Neg =
3129  SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3130  return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg,
3131  S->getI64Imm(63, dl)), 0);
3132  }
3133  // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3134  // (%b < %a) by swapping inputs and falling through.
3135  std::swap(LHS, RHS);
3136  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3137  IsRHSZero = RHSConst && RHSConst->isNullValue();
3138  IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3140  }
3141  case ISD::SETLT: {
3142  // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)
3143  // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1)
3144  // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31)
3145  if (IsRHSOne) {
3146  if (CmpInGPR == ICGPR_NonExtIn)
3147  return SDValue();
3148  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3149  }
3150  if (IsRHSZero)
3151  return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS,
3152  S->getI32Imm(31, dl)), 0);
3153 
3154  if (CmpInGPR == ICGPR_NonExtIn)
3155  return SDValue();
3156  // The upper 32-bits of the register can't be undefined for this sequence.
3157  LHS = signExtendInputIfNeeded(LHS);
3158  RHS = signExtendInputIfNeeded(RHS);
3159  SDValue SUBFNode =
3160  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3161  return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3162  SUBFNode, S->getI64Imm(63, dl)), 0);
3163  }
3164  case ISD::SETUGE:
3165  // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)
3166  // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)
3167  std::swap(LHS, RHS);
3169  case ISD::SETULE: {
3170  if (CmpInGPR == ICGPR_NonExtIn)
3171  return SDValue();
3172  // The upper 32-bits of the register can't be undefined for this sequence.
3173  LHS = zeroExtendInputIfNeeded(LHS);
3174  RHS = zeroExtendInputIfNeeded(RHS);
3175  SDValue Subtract =
3176  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3177  SDValue Shift =
3178  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract,
3179  S->getI32Imm(1, dl), S->getI32Imm(63,dl)),
3180  0);
3181  return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift,
3182  S->getI32Imm(-1, dl)), 0);
3183  }
3184  case ISD::SETUGT:
3185  // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)
3186  // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)
3187  std::swap(LHS, RHS);
3189  case ISD::SETULT: {
3190  if (CmpInGPR == ICGPR_NonExtIn)
3191  return SDValue();
3192  // The upper 32-bits of the register can't be undefined for this sequence.
3193  LHS = zeroExtendInputIfNeeded(LHS);
3194  RHS = zeroExtendInputIfNeeded(RHS);
3195  SDValue Subtract =
3196  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3197  return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3198  Subtract, S->getI64Imm(63, dl)), 0);
3199  }
3200  }
3201 }
3202 
3203 /// Produces a zero-extended result of comparing two 64-bit values according to
3204 /// the passed condition code.
3205 SDValue
3206 IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,
3207  ISD::CondCode CC,
3208  int64_t RHSValue, SDLoc dl) {
3209  if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
3211  return SDValue();
3212  bool IsRHSZero = RHSValue == 0;
3213  bool IsRHSOne = RHSValue == 1;
3214  bool IsRHSNegOne = RHSValue == -1LL;
3215  switch (CC) {
3216  default: return SDValue();
3217  case ISD::SETEQ: {
3218  // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
3219  // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6)
3220  SDValue Xor = IsRHSZero ? LHS :
3221  SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3222  SDValue Clz =
3223  SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0);
3224  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz,
3225  S->getI64Imm(58, dl),
3226  S->getI64Imm(63, dl)), 0);
3227  }
3228  case ISD::SETNE: {
3229  // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3230  // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
3231  // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3232  // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3233  SDValue Xor = IsRHSZero ? LHS :
3234  SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3235  SDValue AC =
3236  SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3237  Xor, S->getI32Imm(~0U, dl)), 0);
3238  return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC,
3239  Xor, AC.getValue(1)), 0);
3240  }
3241  case ISD::SETGE: {
3242  // {subc.reg, subc.CA} = (subcarry %a, %b)
3243  // (zext (setcc %a, %b, setge)) ->
3244  // (adde (lshr %b, 63), (ashr %a, 63), subc.CA)
3245  // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)
3246  if (IsRHSZero)
3247  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3248  std::swap(LHS, RHS);
3249  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3250  IsRHSZero = RHSConst && RHSConst->isNullValue();
3252  }
3253  case ISD::SETLE: {
3254  // {subc.reg, subc.CA} = (subcarry %b, %a)
3255  // (zext (setcc %a, %b, setge)) ->
3256  // (adde (lshr %a, 63), (ashr %b, 63), subc.CA)
3257  // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)
3258  if (IsRHSZero)
3259  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3260  SDValue ShiftL =
3261  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3262  S->getI64Imm(1, dl),
3263  S->getI64Imm(63, dl)), 0);
3264  SDValue ShiftR =
3265  SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3266  S->getI64Imm(63, dl)), 0);
3267  SDValue SubtractCarry =
3268  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3269  LHS, RHS), 1);
3270  return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3271  ShiftR, ShiftL, SubtractCarry), 0);
3272  }
3273  case ISD::SETGT: {
3274  // {subc.reg, subc.CA} = (subcarry %b, %a)
3275  // (zext (setcc %a, %b, setgt)) ->
3276  // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3277  // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)
3278  if (IsRHSNegOne)
3279  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3280  if (IsRHSZero) {
3281  SDValue Addi =
3282  SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3283  S->getI64Imm(~0ULL, dl)), 0);
3284  SDValue Nor =
3285  SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0);
3286  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor,
3287  S->getI64Imm(1, dl),
3288  S->getI64Imm(63, dl)), 0);
3289  }
3290  std::swap(LHS, RHS);
3291  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3292  IsRHSZero = RHSConst && RHSConst->isNullValue();
3293  IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3295  }
3296  case ISD::SETLT: {
3297  // {subc.reg, subc.CA} = (subcarry %a, %b)
3298  // (zext (setcc %a, %b, setlt)) ->
3299  // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3300  // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)
3301  if (IsRHSOne)
3302  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3303  if (IsRHSZero)
3304  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3305  S->getI64Imm(1, dl),
3306  S->getI64Imm(63, dl)), 0);
3307  SDValue SRADINode =
3308  SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3309  LHS, S->getI64Imm(63, dl)), 0);
3310  SDValue SRDINode =
3311  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3312  RHS, S->getI64Imm(1, dl),
3313  S->getI64Imm(63, dl)), 0);
3314  SDValue SUBFC8Carry =
3315  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3316  RHS, LHS), 1);
3317  SDValue ADDE8Node =
3318  SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3319  SRDINode, SRADINode, SUBFC8Carry), 0);
3320  return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3321  ADDE8Node, S->getI64Imm(1, dl)), 0);
3322  }
3323  case ISD::SETUGE:
3324  // {subc.reg, subc.CA} = (subcarry %a, %b)
3325  // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)
3326  std::swap(LHS, RHS);
3328  case ISD::SETULE: {
3329  // {subc.reg, subc.CA} = (subcarry %b, %a)
3330  // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)
3331  SDValue SUBFC8Carry =
3332  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3333  LHS, RHS), 1);
3334  SDValue SUBFE8Node =
3335  SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue,
3336  LHS, LHS, SUBFC8Carry), 0);
3337  return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64,
3338  SUBFE8Node, S->getI64Imm(1, dl)), 0);
3339  }
3340  case ISD::SETUGT:
3341  // {subc.reg, subc.CA} = (subcarry %b, %a)
3342  // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)
3343  std::swap(LHS, RHS);
3345  case ISD::SETULT: {
3346  // {subc.reg, subc.CA} = (subcarry %a, %b)
3347  // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)
3348  SDValue SubtractCarry =
3349  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3350  RHS, LHS), 1);
3351  SDValue ExtSub =
3352  SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3353  LHS, LHS, SubtractCarry), 0);
3354  return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3355  ExtSub), 0);
3356  }
3357  }
3358 }
3359 
3360 /// Produces a sign-extended result of comparing two 64-bit values according to
3361 /// the passed condition code.
3362 SDValue
3363 IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,
3364  ISD::CondCode CC,
3365  int64_t RHSValue, SDLoc dl) {
3366  if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
3368  return SDValue();
3369  bool IsRHSZero = RHSValue == 0;
3370  bool IsRHSOne = RHSValue == 1;
3371  bool IsRHSNegOne = RHSValue == -1LL;
3372  switch (CC) {
3373  default: return SDValue();
3374  case ISD::SETEQ: {
3375  // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3376  // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
3377  // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3378  // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3379  SDValue AddInput = IsRHSZero ? LHS :
3380  SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3381  SDValue Addic =
3382  SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3383  AddInput, S->getI32Imm(~0U, dl)), 0);
3384  return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,
3385  Addic, Addic.getValue(1)), 0);
3386  }
3387  case ISD::SETNE: {
3388  // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
3389  // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
3390  // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
3391  // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
3392  SDValue Xor = IsRHSZero ? LHS :
3393  SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3394  SDValue SC =
3395  SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue,
3396  Xor, S->getI32Imm(0, dl)), 0);
3397  return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC,
3398  SC, SC.getValue(1)), 0);
3399  }
3400  case ISD::SETGE: {
3401  // {subc.reg, subc.CA} = (subcarry %a, %b)
3402  // (zext (setcc %a, %b, setge)) ->
3403  // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))
3404  // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))
3405  if (IsRHSZero)
3406  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3407  std::swap(LHS, RHS);
3408  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3409  IsRHSZero = RHSConst && RHSConst->isNullValue();
3411  }
3412  case ISD::SETLE: {
3413  // {subc.reg, subc.CA} = (subcarry %b, %a)
3414  // (zext (setcc %a, %b, setge)) ->
3415  // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))
3416  // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)
3417  if (IsRHSZero)
3418  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3419  SDValue ShiftR =
3420  SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3421  S->getI64Imm(63, dl)), 0);
3422  SDValue ShiftL =
3423  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3424  S->getI64Imm(1, dl),
3425  S->getI64Imm(63, dl)), 0);
3426  SDValue SubtractCarry =
3427  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3428  LHS, RHS), 1);
3429  SDValue Adde =
3430  SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3431  ShiftR, ShiftL, SubtractCarry), 0);
3432  return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0);
3433  }
3434  case ISD::SETGT: {
3435  // {subc.reg, subc.CA} = (subcarry %b, %a)
3436  // (zext (setcc %a, %b, setgt)) ->
3437  // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3438  // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)
3439  if (IsRHSNegOne)
3440  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3441  if (IsRHSZero) {
3442  SDValue Add =
3443  SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3444  S->getI64Imm(-1, dl)), 0);
3445  SDValue Nor =
3446  SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0);
3447  return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor,
3448  S->getI64Imm(63, dl)), 0);
3449  }
3450  std::swap(LHS, RHS);
3451  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3452  IsRHSZero = RHSConst && RHSConst->isNullValue();
3453  IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3455  }
3456  case ISD::SETLT: {
3457  // {subc.reg, subc.CA} = (subcarry %a, %b)
3458  // (zext (setcc %a, %b, setlt)) ->
3459  // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3460  // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)
3461  if (IsRHSOne)
3462  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3463  if (IsRHSZero) {
3464  return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS,
3465  S->getI64Imm(63, dl)), 0);
3466  }
3467  SDValue SRADINode =
3468  SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3469  LHS, S->getI64Imm(63, dl)), 0);
3470  SDValue SRDINode =
3471  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3472  RHS, S->getI64Imm(1, dl),
3473  S->getI64Imm(63, dl)), 0);
3474  SDValue SUBFC8Carry =
3475  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3476  RHS, LHS), 1);
3477  SDValue ADDE8Node =
3478  SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64,
3479  SRDINode, SRADINode, SUBFC8Carry), 0);
3480  SDValue XORI8Node =
3481  SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3482  ADDE8Node, S->getI64Imm(1, dl)), 0);
3483  return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3484  XORI8Node), 0);
3485  }
3486  case ISD::SETUGE:
3487  // {subc.reg, subc.CA} = (subcarry %a, %b)
3488  // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)
3489  std::swap(LHS, RHS);
3491  case ISD::SETULE: {
3492  // {subc.reg, subc.CA} = (subcarry %b, %a)
3493  // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)
3494  SDValue SubtractCarry =
3495  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3496  LHS, RHS), 1);
3497  SDValue ExtSub =
3498  SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS,
3499  LHS, SubtractCarry), 0);
3500  return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64,
3501  ExtSub, ExtSub), 0);
3502  }
3503  case ISD::SETUGT:
3504  // {subc.reg, subc.CA} = (subcarry %b, %a)
3505  // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)
3506  std::swap(LHS, RHS);
3508  case ISD::SETULT: {
3509  // {subc.reg, subc.CA} = (subcarry %a, %b)
3510  // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)
3511  SDValue SubCarry =
3512  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3513  RHS, LHS), 1);
3514  return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3515  LHS, LHS, SubCarry), 0);
3516  }
3517  }
3518 }
3519 
3520 /// Do all uses of this SDValue need the result in a GPR?
3521 /// This is meant to be used on values that have type i1 since
3522 /// it is somewhat meaningless to ask if values of other types
3523 /// should be kept in GPR's.
3524 static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {
3525  assert(Compare.getOpcode() == ISD::SETCC &&
3526  "An ISD::SETCC node required here.");
3527 
3528  // For values that have a single use, the caller should obviously already have
3529  // checked if that use is an extending use. We check the other uses here.
3530  if (Compare.hasOneUse())
3531  return true;
3532  // We want the value in a GPR if it is being extended, used for a select, or
3533  // used in logical operations.
3534  for (auto CompareUse : Compare.getNode()->uses())
3535  if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&
3536  CompareUse->getOpcode() != ISD::ZERO_EXTEND &&
3537  CompareUse->getOpcode() != ISD::SELECT &&
3538  !isLogicOp(CompareUse->getOpcode())) {
3539  OmittedForNonExtendUses++;
3540  return false;
3541  }
3542  return true;
3543 }
3544 
3545 /// Returns an equivalent of a SETCC node but with the result the same width as
3546 /// the inputs. This can also be used for SELECT_CC if either the true or false
3547 /// values is a power of two while the other is zero.
3548 SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,
3549  SetccInGPROpts ConvOpts) {
3550  assert((Compare.getOpcode() == ISD::SETCC ||
3551  Compare.getOpcode() == ISD::SELECT_CC) &&
3552  "An ISD::SETCC node required here.");
3553 
3554  // Don't convert this comparison to a GPR sequence because there are uses
3555  // of the i1 result (i.e. uses that require the result in the CR).
3556  if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG))
3557  return SDValue();
3558 
3559  SDValue LHS = Compare.getOperand(0);
3560  SDValue RHS = Compare.getOperand(1);
3561 
3562  // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
3563  int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2;
3564  ISD::CondCode CC =
3565  cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get();
3566  EVT InputVT = LHS.getValueType();
3567  if (InputVT != MVT::i32 && InputVT != MVT::i64)
3568  return SDValue();
3569 
3570  if (ConvOpts == SetccInGPROpts::ZExtInvert ||
3571  ConvOpts == SetccInGPROpts::SExtInvert)
3572  CC = ISD::getSetCCInverse(CC, true);
3573 
3574  bool Inputs32Bit = InputVT == MVT::i32;
3575 
3576  SDLoc dl(Compare);
3577  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3578  int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX;
3579  bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||
3580  ConvOpts == SetccInGPROpts::SExtInvert;
3581 
3582  if (IsSext && Inputs32Bit)
3583  return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
3584  else if (Inputs32Bit)
3585  return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
3586  else if (IsSext)
3587  return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
3588  return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
3589 }
3590 
3591 } // end anonymous namespace
3592 
3593 bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) {
3594  if (N->getValueType(0) != MVT::i32 &&
3595  N->getValueType(0) != MVT::i64)
3596  return false;
3597 
3598  // This optimization will emit code that assumes 64-bit registers
3599  // so we don't want to run it in 32-bit mode. Also don't run it
3600  // on functions that are not to be optimized.
3601  if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64())
3602  return false;
3603 
3604  switch (N->getOpcode()) {
3605  default: break;
3606  case ISD::ZERO_EXTEND:
3607  case ISD::SIGN_EXTEND:
3608  case ISD::AND:
3609  case ISD::OR:
3610  case ISD::XOR: {
3611  IntegerCompareEliminator ICmpElim(CurDAG, this);
3612  if (SDNode *New = ICmpElim.Select(N)) {
3613  ReplaceNode(N, New);
3614  return true;
3615  }
3616  }
3617  }
3618  return false;
3619 }
3620 
3621 bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
3622  if (N->getValueType(0) != MVT::i32 &&
3623  N->getValueType(0) != MVT::i64)
3624  return false;
3625 
3626  if (!UseBitPermRewriter)
3627  return false;
3628 
3629  switch (N->getOpcode()) {
3630  default: break;
3631  case ISD::ROTL:
3632  case ISD::SHL:
3633  case ISD::SRL:
3634  case ISD::AND:
3635  case ISD::OR: {
3636  BitPermutationSelector BPS(CurDAG);
3637  if (SDNode *New = BPS.Select(N)) {
3638  ReplaceNode(N, New);
3639  return true;
3640  }
3641  return false;
3642  }
3643  }
3644 
3645  return false;
3646 }
3647 
3648 /// SelectCC - Select a comparison of the specified values with the specified
3649 /// condition code, returning the CR# of the expression.
3650 SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3651  const SDLoc &dl) {
3652  // Always select the LHS.
3653  unsigned Opc;
3654 
3655  if (LHS.getValueType() == MVT::i32) {
3656  unsigned Imm;
3657  if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3658  if (isInt32Immediate(RHS, Imm)) {
3659  // SETEQ/SETNE comparison with 16-bit immediate, fold it.
3660  if (isUInt<16>(Imm))
3661  return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
3662  getI32Imm(Imm & 0xFFFF, dl)),
3663  0);
3664  // If this is a 16-bit signed immediate, fold it.
3665  if (isInt<16>((int)Imm))
3666  return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
3667  getI32Imm(Imm & 0xFFFF, dl)),
3668  0);
3669 
3670  // For non-equality comparisons, the default code would materialize the
3671  // constant, then compare against it, like this:
3672  // lis r2, 4660
3673  // ori r2, r2, 22136
3674  // cmpw cr0, r3, r2
3675  // Since we are just comparing for equality, we can emit this instead:
3676  // xoris r0,r3,0x1234
3677  // cmplwi cr0,r0,0x5678
3678  // beq cr0,L6
3679  SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
3680  getI32Imm(Imm >> 16, dl)), 0);
3681  return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
3682  getI32Imm(Imm & 0xFFFF, dl)), 0);
3683  }
3684  Opc = PPC::CMPLW;
3685  } else if (ISD::isUnsignedIntSetCC(CC)) {
3686  if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
3687  return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
3688  getI32Imm(Imm & 0xFFFF, dl)), 0);
3689  Opc = PPC::CMPLW;
3690  } else {
3691  int16_t SImm;
3692  if (isIntS16Immediate(RHS, SImm))
3693  return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
3694  getI32Imm((int)SImm & 0xFFFF,
3695  dl)),
3696  0);
3697  Opc = PPC::CMPW;
3698  }
3699  } else if (LHS.getValueType() == MVT::i64) {
3700  uint64_t Imm;
3701  if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3702  if (isInt64Immediate(RHS.getNode(), Imm)) {
3703  // SETEQ/SETNE comparison with 16-bit immediate, fold it.
3704  if (isUInt<16>(Imm))
3705  return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
3706  getI32Imm(Imm & 0xFFFF, dl)),
3707  0);
3708  // If this is a 16-bit signed immediate, fold it.
3709  if (isInt<16>(Imm))
3710  return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
3711  getI32Imm(Imm & 0xFFFF, dl)),
3712  0);
3713 
3714  // For non-equality comparisons, the default code would materialize the
3715  // constant, then compare against it, like this:
3716  // lis r2, 4660
3717  // ori r2, r2, 22136
3718  // cmpd cr0, r3, r2
3719  // Since we are just comparing for equality, we can emit this instead:
3720  // xoris r0,r3,0x1234
3721  // cmpldi cr0,r0,0x5678
3722  // beq cr0,L6
3723  if (isUInt<32>(Imm)) {
3724  SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
3725  getI64Imm(Imm >> 16, dl)), 0);
3726  return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
3727  getI64Imm(Imm & 0xFFFF, dl)),
3728  0);
3729  }
3730  }
3731  Opc = PPC::CMPLD;
3732  } else if (ISD::isUnsignedIntSetCC(CC)) {
3733  if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
3734  return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
3735  getI64Imm(Imm & 0xFFFF, dl)), 0);
3736  Opc = PPC::CMPLD;
3737  } else {
3738  int16_t SImm;
3739  if (isIntS16Immediate(RHS, SImm))
3740  return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
3741  getI64Imm(SImm & 0xFFFF, dl)),
3742  0);
3743  Opc = PPC::CMPD;
3744  }
3745  } else if (LHS.getValueType() == MVT::f32) {
3746  if (PPCSubTarget->hasSPE()) {
3747  switch (CC) {
3748  default:
3749  case ISD::SETEQ:
3750  case ISD::SETNE:
3751  Opc = PPC::EFSCMPEQ;
3752  break;
3753  case ISD::SETLT:
3754  case ISD::SETGE:
3755  case ISD::SETOLT:
3756  case ISD::SETOGE:
3757  case ISD::SETULT:
3758  case ISD::SETUGE:
3759  Opc = PPC::EFSCMPLT;
3760  break;
3761  case ISD::SETGT:
3762  case ISD::SETLE:
3763  case ISD::SETOGT:
3764  case ISD::SETOLE:
3765  case ISD::SETUGT:
3766  case ISD::SETULE:
3767  Opc = PPC::EFSCMPGT;
3768  break;
3769  }
3770  } else
3771  Opc = PPC::FCMPUS;
3772  } else if (LHS.getValueType() == MVT::f64) {
3773  if (PPCSubTarget->hasSPE()) {
3774  switch (CC) {
3775  default:
3776  case ISD::SETEQ:
3777  case ISD::SETNE:
3778  Opc = PPC::EFDCMPEQ;
3779  break;
3780  case ISD::SETLT:
3781  case ISD::SETGE:
3782  case ISD::SETOLT:
3783  case ISD::SETOGE:
3784  case ISD::SETULT:
3785  case ISD::SETUGE:
3786  Opc = PPC::EFDCMPLT;
3787  break;
3788  case ISD::SETGT:
3789  case ISD::SETLE:
3790  case ISD::SETOGT:
3791  case ISD::SETOLE:
3792  case ISD::SETUGT:
3793  case ISD::SETULE:
3794  Opc = PPC::EFDCMPGT;
3795  break;
3796  }
3797  } else
3798  Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
3799  } else {
3800  assert(LHS.getValueType() == MVT::f128 && "Unknown vt!");
3801  assert(PPCSubTarget->hasVSX() && "__float128 requires VSX");
3802  Opc = PPC::XSCMPUQP;
3803  }
3804  return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
3805 }
3806 
3808  switch (CC) {
3809  case ISD::SETUEQ:
3810  case ISD::SETONE:
3811  case ISD::SETOLE:
3812  case ISD::SETOGE:
3813  llvm_unreachable("Should be lowered by legalize!");
3814  default: llvm_unreachable("Unknown condition!");
3815  case ISD::SETOEQ:
3816  case ISD::SETEQ: return PPC::PRED_EQ;
3817  case ISD::SETUNE:
3818  case ISD::SETNE: return PPC::PRED_NE;
3819  case ISD::SETOLT:
3820  case ISD::SETLT: return PPC::PRED_LT;
3821  case ISD::SETULE:
3822  case ISD::SETLE: return PPC::PRED_LE;
3823  case ISD::SETOGT:
3824  case ISD::SETGT: return PPC::PRED_GT;
3825  case ISD::SETUGE:
3826  case ISD::SETGE: return PPC::PRED_GE;
3827  case ISD::SETO: return PPC::PRED_NU;
3828  case ISD::SETUO: return PPC::PRED_UN;
3829  // These two are invalid for floating point. Assume we have int.
3830  case ISD::SETULT: return PPC::PRED_LT;
3831  case ISD::SETUGT: return PPC::PRED_GT;
3832  }
3833 }
3834 
3835 /// getCRIdxForSetCC - Return the index of the condition register field
3836 /// associated with the SetCC condition, and whether or not the field is
3837 /// treated as inverted. That is, lt = 0; ge = 0 inverted.
3838 static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
3839  Invert = false;
3840  switch (CC) {
3841  default: llvm_unreachable("Unknown condition!");
3842  case ISD::SETOLT:
3843  case ISD::SETLT: return 0; // Bit #0 = SETOLT
3844  case ISD::SETOGT:
3845  case ISD::SETGT: return 1; // Bit #1 = SETOGT
3846  case ISD::SETOEQ:
3847  case ISD::SETEQ: return 2; // Bit #2 = SETOEQ
3848  case ISD::SETUO: return 3; // Bit #3 = SETUO
3849  case ISD::SETUGE:
3850  case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE
3851  case ISD::SETULE:
3852  case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE
3853  case ISD::SETUNE:
3854  case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE
3855  case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO
3856  case ISD::SETUEQ:
3857  case ISD::SETOGE:
3858  case ISD::SETOLE:
3859  case ISD::SETONE:
3860  llvm_unreachable("Invalid branch code: should be expanded by legalize");
3861  // These are invalid for floating point. Assume integer.
3862  case ISD::SETULT: return 0;
3863  case ISD::SETUGT: return 1;
3864  }
3865 }
3866 
3867 // getVCmpInst: return the vector compare instruction for the specified
3868 // vector type and condition code. Since this is for altivec specific code,
3869 // only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32).
3870 static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
3871  bool HasVSX, bool &Swap, bool &Negate) {
3872  Swap = false;
3873  Negate = false;
3874 
3875  if (VecVT.isFloatingPoint()) {
3876  /* Handle some cases by swapping input operands. */
3877  switch (CC) {
3878  case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
3879  case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
3880  case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
3881  case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
3882  case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
3883  case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
3884  default: break;
3885  }
3886  /* Handle some cases by negating the result. */
3887  switch (CC) {
3888  case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
3889  case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
3890  case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
3891  case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
3892  default: break;
3893  }
3894  /* We have instructions implementing the remaining cases. */
3895  switch (CC) {
3896  case ISD::SETEQ:
3897  case ISD::SETOEQ:
3898  if (VecVT == MVT::v4f32)
3899  return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
3900  else if (VecVT == MVT::v2f64)
3901  return PPC::XVCMPEQDP;
3902  break;
3903  case ISD::SETGT:
3904  case ISD::SETOGT:
3905  if (VecVT == MVT::v4f32)
3906  return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
3907  else if (VecVT == MVT::v2f64)
3908  return PPC::XVCMPGTDP;
3909  break;
3910  case ISD::SETGE:
3911  case ISD::SETOGE:
3912  if (VecVT == MVT::v4f32)
3913  return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
3914  else if (VecVT == MVT::v2f64)
3915  return PPC::XVCMPGEDP;
3916  break;
3917  default:
3918  break;
3919  }
3920  llvm_unreachable("Invalid floating-point vector compare condition");
3921  } else {
3922  /* Handle some cases by swapping input operands. */
3923  switch (CC) {
3924  case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
3925  case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
3926  case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
3927  case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
3928  default: break;
3929  }
3930  /* Handle some cases by negating the result. */
3931  switch (CC) {
3932  case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
3933  case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
3934  case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
3935  case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
3936  default: break;
3937  }
3938  /* We have instructions implementing the remaining cases. */
3939  switch (CC) {
3940  case ISD::SETEQ:
3941  case ISD::SETUEQ:
3942  if (VecVT == MVT::v16i8)
3943  return PPC::VCMPEQUB;
3944  else if (VecVT == MVT::v8i16)
3945  return PPC::VCMPEQUH;
3946  else if (VecVT == MVT::v4i32)
3947  return PPC::VCMPEQUW;
3948  else if (VecVT == MVT::v2i64)
3949  return PPC::VCMPEQUD;
3950  break;
3951  case ISD::SETGT:
3952  if (VecVT == MVT::v16i8)
3953  return PPC::VCMPGTSB;
3954  else if (VecVT == MVT::v8i16)
3955  return PPC::VCMPGTSH;
3956  else if (VecVT == MVT::v4i32)
3957  return PPC::VCMPGTSW;
3958  else if (VecVT == MVT::v2i64)
3959  return PPC::VCMPGTSD;
3960  break;
3961  case ISD::SETUGT:
3962  if (VecVT == MVT::v16i8)
3963  return PPC::VCMPGTUB;
3964  else if (VecVT == MVT::v8i16)
3965  return PPC::VCMPGTUH;
3966  else if (VecVT == MVT::v4i32)
3967  return PPC::VCMPGTUW;
3968  else if (VecVT == MVT::v2i64)
3969  return PPC::VCMPGTUD;
3970  break;
3971  default:
3972  break;
3973  }
3974  llvm_unreachable("Invalid integer vector compare condition");
3975  }
3976 }
3977 
3978 bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
3979  SDLoc dl(N);
3980  unsigned Imm;
3981  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3982  EVT PtrVT =
3983  CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
3984  bool isPPC64 = (PtrVT == MVT::i64);
3985 
3986  if (!PPCSubTarget->useCRBits() &&
3987  isInt32Immediate(N->getOperand(1), Imm)) {
3988  // We can codegen setcc op, imm very efficiently compared to a brcond.
3989  // Check for those cases here.
3990  // setcc op, 0
3991  if (Imm == 0) {
3992  SDValue Op = N->getOperand(0);
3993  switch (CC) {
3994  default: break;
3995  case ISD::SETEQ: {
3996  Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
3997  SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),
3998  getI32Imm(31, dl) };
3999  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4000  return true;
4001  }
4002  case ISD::SETNE: {
4003  if (isPPC64) break;
4004  SDValue AD =
4005  SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4006  Op, getI32Imm(~0U, dl)), 0);
4007  CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1));
4008  return true;
4009  }
4010  case ISD::SETLT: {
4011  SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4012  getI32Imm(31, dl) };
4013  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4014  return true;
4015  }
4016  case ISD::SETGT: {
4017  SDValue T =
4018  SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
4019  T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
4020  SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),
4021  getI32Imm(31, dl) };
4022  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4023  return true;
4024  }
4025  }
4026  } else if (Imm == ~0U) { // setcc op, -1
4027  SDValue Op = N->getOperand(0);
4028  switch (CC) {
4029  default: break;
4030  case ISD::SETEQ:
4031  if (isPPC64) break;
4032  Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4033  Op, getI32Imm(1, dl)), 0);
4034  CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
4035  SDValue(CurDAG->getMachineNode(PPC::LI, dl,
4036  MVT::i32,
4037  getI32Imm(0, dl)),
4038  0), Op.getValue(1));
4039  return true;
4040  case ISD::SETNE: {
4041  if (isPPC64) break;
4042  Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
4043  SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4044  Op, getI32Imm(~0U, dl));
4045  CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op,
4046  SDValue(AD, 1));
4047  return true;
4048  }
4049  case ISD::SETLT: {
4050  SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
4051  getI32Imm(1, dl)), 0);
4052  SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
4053  Op), 0);
4054  SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
4055  getI32Imm(31, dl) };
4056  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4057  return true;
4058  }
4059  case ISD::SETGT: {
4060  SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4061  getI32Imm(31, dl) };
4062  Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4063  CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl));
4064  return true;
4065  }
4066  }
4067  }
4068  }
4069 
4070  SDValue LHS = N->getOperand(0);
4071  SDValue RHS = N->getOperand(1);
4072 
4073  // Altivec Vector compare instructions do not set any CR register by default and
4074  // vector compare operations return the same type as the operands.
4075  if (LHS.getValueType().isVector()) {
4076  if (PPCSubTarget->hasQPX() || PPCSubTarget->hasSPE())
4077  return false;
4078 
4079  EVT VecVT = LHS.getValueType();
4080  bool Swap, Negate;
4081  unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC,
4082  PPCSubTarget->hasVSX(), Swap, Negate);
4083  if (Swap)
4084  std::swap(LHS, RHS);
4085 
4086  EVT ResVT = VecVT.changeVectorElementTypeToInteger();
4087  if (Negate) {
4088  SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
4089  CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
4090  ResVT, VCmp, VCmp);
4091  return true;
4092  }
4093 
4094  CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);
4095  return true;
4096  }
4097 
4098  if (PPCSubTarget->useCRBits())
4099  return false;
4100 
4101  bool Inv;
4102  unsigned Idx = getCRIdxForSetCC(CC, Inv);
4103  SDValue CCReg = SelectCC(LHS, RHS, CC, dl);
4104  SDValue IntCR;
4105 
4106  // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that
4107  // The correct compare instruction is already set by SelectCC()
4108  if (PPCSubTarget->hasSPE() && LHS.getValueType().isFloatingPoint()) {
4109  Idx = 1;
4110  }
4111 
4112  // Force the ccreg into CR7.
4113  SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
4114 
4115  SDValue InFlag(nullptr, 0); // Null incoming flag value.
4116  CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
4117  InFlag).getValue(1);
4118 
4119  IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
4120  CCReg), 0);
4121 
4122  SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
4123  getI32Imm(31, dl), getI32Imm(31, dl) };
4124  if (!Inv) {
4125  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4126  return true;
4127  }
4128 
4129  // Get the specified bit.
4130  SDValue Tmp =
4131  SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4132  CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
4133  return true;
4134 }
4135 
4136 /// Does this node represent a load/store node whose address can be represented
4137 /// with a register plus an immediate that's a multiple of \p Val:
4138 bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
4139  LoadSDNode *LDN = dyn_cast<LoadSDNode>(N);
4140  StoreSDNode *STN = dyn_cast<StoreSDNode>(N);
4141  SDValue AddrOp;
4142  if (LDN)
4143  AddrOp = LDN->getOperand(1);
4144  else if (STN)
4145  AddrOp = STN->getOperand(2);
4146 
4147  // If the address points a frame object or a frame object with an offset,
4148  // we need to check the object alignment.
4149  short Imm = 0;
4150  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(
4151  AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) :
4152  AddrOp)) {
4153  // If op0 is a frame index that is under aligned, we can't do it either,
4154  // because it is translated to r31 or r1 + slot + offset. We won't know the
4155  // slot number until the stack frame is finalized.
4156  const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
4157  unsigned SlotAlign = MFI.getObjectAlignment(FI->getIndex());
4158  if ((SlotAlign % Val) != 0)
4159  return false;
4160 
4161  // If we have an offset, we need further check on the offset.
4162  if (AddrOp.getOpcode() != ISD::ADD)
4163  return true;
4164  }
4165 
4166  if (AddrOp.getOpcode() == ISD::ADD)
4167  return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val);
4168 
4169  // If the address comes from the outside, the offset will be zero.
4170  return AddrOp.getOpcode() == ISD::CopyFromReg;
4171 }
4172 
4173 void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
4174  // Transfer memoperands.
4175  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4176  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
4177 }
4178 
4179 static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
4180  bool &NeedSwapOps, bool &IsUnCmp) {
4181 
4182  assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.");
4183 
4184  SDValue LHS = N->getOperand(0);
4185  SDValue RHS = N->getOperand(1);
4186  SDValue TrueRes = N->getOperand(2);
4187  SDValue FalseRes = N->getOperand(3);
4188  ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);
4189  if (!TrueConst)
4190  return false;
4191 
4192  assert((N->getSimpleValueType(0) == MVT::i64 ||
4193  N->getSimpleValueType(0) == MVT::i32) &&
4194  "Expecting either i64 or i32 here.");
4195 
4196  // We are looking for any of:
4197  // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4198  // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4199  // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq)
4200  // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq)
4201  int64_t TrueResVal = TrueConst->getSExtValue();
4202  if ((TrueResVal < -1 || TrueResVal > 1) ||
4203  (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) ||
4204  (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) ||
4205  (TrueResVal == 0 &&
4206  (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ)))
4207  return false;
4208 
4209  bool InnerIsSel = FalseRes.getOpcode() == ISD::SELECT_CC;
4210  SDValue SetOrSelCC = InnerIsSel ? FalseRes : FalseRes.getOperand(0);
4211  if (SetOrSelCC.getOpcode() != ISD::SETCC &&
4212  SetOrSelCC.getOpcode() != ISD::SELECT_CC)
4213  return false;
4214 
4215  // Without this setb optimization, the outer SELECT_CC will be manually
4216  // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
4217  // transforms pseduo instruction to isel instruction. When there are more than
4218  // one use for result like zext/sext, with current optimization we only see
4219  // isel is replaced by setb but can't see any significant gain. Since
4220  // setb has longer latency than original isel, we should avoid this. Another
4221  // point is that setb requires comparison always kept, it can break the
4222  // oppotunity to get the comparison away if we have in future.
4223  if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))
4224  return false;
4225 
4226  SDValue InnerLHS = SetOrSelCC.getOperand(0);
4227  SDValue InnerRHS = SetOrSelCC.getOperand(1);
4228  ISD::CondCode InnerCC =
4229  cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get();
4230  // If the inner comparison is a select_cc, make sure the true/false values are
4231  // 1/-1 and canonicalize it if needed.
4232  if (InnerIsSel) {
4233  ConstantSDNode *SelCCTrueConst =
4234  dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2));
4235  ConstantSDNode *SelCCFalseConst =
4236  dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3));
4237  if (!SelCCTrueConst || !SelCCFalseConst)
4238  return false;
4239  int64_t SelCCTVal = SelCCTrueConst->getSExtValue();
4240  int64_t SelCCFVal = SelCCFalseConst->getSExtValue();
4241  // The values must be -1/1 (requiring a swap) or 1/-1.
4242  if (SelCCTVal == -1 && SelCCFVal == 1) {
4243  std::swap(InnerLHS, InnerRHS);
4244  } else if (SelCCTVal != 1 || SelCCFVal != -1)
4245  return false;
4246  }
4247 
4248  // Canonicalize unsigned case
4249  if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) {
4250  IsUnCmp = true;
4251  InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT;
4252  }
4253 
4254  bool InnerSwapped = false;
4255  if (LHS == InnerRHS && RHS == InnerLHS)
4256  InnerSwapped = true;
4257  else if (LHS != InnerLHS || RHS != InnerRHS)
4258  return false;
4259 
4260  switch (CC) {
4261  // (select_cc lhs, rhs, 0, \
4262  // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)
4263  case ISD::SETEQ:
4264  if (!InnerIsSel)
4265  return false;
4266  if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT)
4267  return false;
4268  NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped;
4269  break;
4270 
4271  // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4272  // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)
4273  // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)
4274  // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4275  // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)
4276  // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)
4277  case ISD::SETULT:
4278  if (!IsUnCmp && InnerCC != ISD::SETNE)
4279  return false;
4280  IsUnCmp = true;
4282  case ISD::SETLT:
4283  if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) ||
4284  (InnerCC == ISD::SETLT && InnerSwapped))
4285  NeedSwapOps = (TrueResVal == 1);
4286  else
4287  return false;
4288  break;
4289 
4290  // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4291  // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)
4292  // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)
4293  // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4294  // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)
4295  // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)
4296  case ISD::SETUGT:
4297  if (!IsUnCmp && InnerCC != ISD::SETNE)
4298  return false;
4299  IsUnCmp = true;
4301  case ISD::SETGT:
4302  if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) ||
4303  (InnerCC == ISD::SETGT && InnerSwapped))
4304  NeedSwapOps = (TrueResVal == -1);
4305  else
4306  return false;
4307  break;
4308 
4309  default:
4310  return false;
4311  }
4312 
4313  LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ");
4314  LLVM_DEBUG(N->dump());
4315 
4316  return true;
4317 }
4318 
4319 // Select - Convert the specified operand from a target-independent to a
4320 // target-specific node if it hasn't already been changed.
4322  SDLoc dl(N);
4323  if (N->isMachineOpcode()) {
4324  N->setNodeId(-1);
4325  return; // Already selected.
4326  }
4327 
4328  // In case any misguided DAG-level optimizations form an ADD with a
4329  // TargetConstant operand, crash here instead of miscompiling (by selecting
4330  // an r+r add instead of some kind of r+i add).
4331  if (N->getOpcode() == ISD::ADD &&
4333  llvm_unreachable("Invalid ADD with TargetConstant operand");
4334 
4335  // Try matching complex bit permutations before doing anything else.
4336  if (tryBitPermutation(N))
4337  return;
4338 
4339  // Try to emit integer compares as GPR-only sequences (i.e. no use of CR).
4340  if (tryIntCompareInGPR(N))
4341  return;
4342 
4343  switch (N->getOpcode()) {
4344  default: break;
4345 
4346  case ISD::Constant:
4347  if (N->getValueType(0) == MVT::i64) {
4348  ReplaceNode(N, selectI64Imm(CurDAG, N));
4349  return;
4350  }
4351  break;
4352 
4353  case ISD::SETCC:
4354  if (trySETCC(N))
4355  return;
4356  break;
4357 
4358  case PPCISD::CALL: {
4359  const Module *M = MF->getFunction().getParent();
4360 
4361  if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
4362  (!TM.isPositionIndependent() || !PPCSubTarget->isSecurePlt()) ||
4363  !PPCSubTarget->isTargetELF() || M->getPICLevel() == PICLevel::SmallPIC)
4364  break;
4365 
4366  SDValue Op = N->getOperand(1);
4367 
4368  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
4369  if (GA->getTargetFlags() == PPCII::MO_PLT)
4370  getGlobalBaseReg();
4371  }
4372  else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
4373  if (ES->getTargetFlags() == PPCII::MO_PLT)
4374  getGlobalBaseReg();
4375  }
4376  }
4377  break;
4378 
4379  case PPCISD::GlobalBaseReg:
4380  ReplaceNode(N, getGlobalBaseReg());
4381  return;
4382 
4383  case ISD::FrameIndex:
4384  selectFrameIndex(N, N);
4385  return;
4386 
4387  case PPCISD::MFOCRF: {
4388  SDValue InFlag = N->getOperand(1);
4389  ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
4390  N->getOperand(0), InFlag));
4391  return;
4392  }
4393 
4395  ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
4396  MVT::Other, N->getOperand(0)));
4397  return;
4398 
4399  case PPCISD::SRA_ADDZE: {
4400  SDValue N0 = N->getOperand(0);
4401  SDValue ShiftAmt =
4402  CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
4403  getConstantIntValue(), dl,
4404  N->getValueType(0));
4405  if (N->getValueType(0) == MVT::i64) {
4406  SDNode *Op =
4407  CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
4408  N0, ShiftAmt);
4409  CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0),
4410  SDValue(Op, 1));
4411  return;
4412  } else {
4413  assert(N->getValueType(0) == MVT::i32 &&
4414  "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
4415  SDNode *Op =
4416  CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
4417  N0, ShiftAmt);
4418  CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0),
4419  SDValue(Op, 1));
4420  return;
4421  }
4422  }
4423 
4424  case ISD::STORE: {
4425  // Change TLS initial-exec D-form stores to X-form stores.
4426  StoreSDNode *ST = cast<StoreSDNode>(N);
4427  if (EnableTLSOpt && PPCSubTarget->isELFv2ABI() &&
4429  if (tryTLSXFormStore(ST))
4430  return;
4431  break;
4432  }
4433  case ISD::LOAD: {
4434  // Handle preincrement loads.
4435  LoadSDNode *LD = cast<LoadSDNode>(N);
4436  EVT LoadedVT = LD->getMemoryVT();
4437 
4438  // Normal loads are handled by code generated from the .td file.
4439  if (LD->getAddressingMode() != ISD::PRE_INC) {
4440  // Change TLS initial-exec D-form loads to X-form loads.
4441  if (EnableTLSOpt && PPCSubTarget->isELFv2ABI())
4442  if (tryTLSXFormLoad(LD))
4443  return;
4444  break;
4445  }
4446 
4447  SDValue Offset = LD->getOffset();
4448  if (Offset.getOpcode() == ISD::TargetConstant ||
4449  Offset.getOpcode() == ISD::TargetGlobalAddress) {
4450 
4451  unsigned Opcode;
4452  bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
4453  if (LD->getValueType(0) != MVT::i64) {
4454  // Handle PPC32 integer and normal FP loads.
4455  assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
4456  switch (LoadedVT.getSimpleVT().SimpleTy) {
4457  default: llvm_unreachable("Invalid PPC load type!");
4458  case MVT::f64: Opcode = PPC::LFDU; break;
4459  case MVT::f32: Opcode = PPC::LFSU; break;
4460  case MVT::i32: Opcode = PPC::LWZU; break;
4461  case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
4462  case MVT::i1:
4463  case MVT::i8: Opcode = PPC::LBZU; break;
4464  }
4465  } else {
4466  assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
4467  assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
4468  switch (LoadedVT.getSimpleVT().SimpleTy) {
4469  default: llvm_unreachable("Invalid PPC load type!");
4470  case MVT::i64: Opcode = PPC::LDU; break;
4471  case MVT::i32: Opcode = PPC::LWZU8; break;
4472  case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
4473  case MVT::i1:
4474  case MVT::i8: Opcode = PPC::LBZU8; break;
4475  }
4476  }
4477 
4478  SDValue Chain = LD->getChain();
4479  SDValue Base = LD->getBasePtr();
4480  SDValue Ops[] = { Offset, Base, Chain };
4481  SDNode *MN = CurDAG->getMachineNode(
4482  Opcode, dl, LD->getValueType(0),
4483  PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
4484  transferMemOperands(N, MN);
4485  ReplaceNode(N, MN);
4486  return;
4487  } else {
4488  unsigned Opcode;
4489  bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
4490  if (LD->getValueType(0) != MVT::i64) {
4491  // Handle PPC32 integer and normal FP loads.
4492  assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
4493  switch (LoadedVT.getSimpleVT().SimpleTy) {
4494  default: llvm_unreachable("Invalid PPC load type!");
4495  case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX
4496  case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX
4497  case MVT::f64: Opcode = PPC::LFDUX; break;
4498  case MVT::f32: Opcode = PPC::LFSUX; break;
4499  case MVT::i32: Opcode = PPC::LWZUX; break;
4500  case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
4501  case MVT::i1:
4502  case MVT::i8: Opcode = PPC::LBZUX; break;
4503  }
4504  } else {
4505  assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
4506  assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
4507  "Invalid sext update load");
4508  switch (LoadedVT.getSimpleVT().SimpleTy) {
4509  default: llvm_unreachable("Invalid PPC load type!");
4510  case MVT::i64: Opcode = PPC::LDUX; break;
4511  case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break;
4512  case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
4513  case MVT::i1:
4514  case MVT::i8: Opcode = PPC::LBZUX8; break;
4515  }
4516  }
4517 
4518  SDValue Chain = LD->getChain();
4519  SDValue Base = LD->getBasePtr();
4520  SDValue Ops[] = { Base, Offset, Chain };
4521  SDNode *MN = CurDAG->getMachineNode(
4522  Opcode, dl, LD->getValueType(0),
4523  PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
4524  transferMemOperands(N, MN);
4525  ReplaceNode(N, MN);
4526  return;
4527  }
4528  }
4529 
4530  case ISD::AND: {
4531  unsigned Imm, Imm2, SH, MB, ME;
4532  uint64_t Imm64;
4533 
4534  // If this is an and of a value rotated between 0 and 31 bits and then and'd
4535  // with a mask, emit rlwinm
4536  if (isInt32Immediate(N->getOperand(1), Imm) &&
4537  isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
4538  SDValue Val = N->getOperand(0).getOperand(0);
4539  SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
4540  getI32Imm(ME, dl) };
4541  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4542  return;
4543  }
4544  // If this is just a masked value where the input is not handled above, and
4545  // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
4546  if (isInt32Immediate(N->getOperand(1), Imm) &&
4547  isRunOfOnes(Imm, MB, ME) &&
4548  N->getOperand(0).getOpcode() != ISD::ROTL) {
4549  SDValue Val = N->getOperand(0);
4550  SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl),
4551  getI32Imm(ME, dl) };
4552  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4553  return;
4554  }
4555  // If this is a 64-bit zero-extension mask, emit rldicl.
4556  if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
4557  isMask_64(Imm64)) {
4558  SDValue Val = N->getOperand(0);
4559  MB = 64 - countTrailingOnes(Imm64);
4560  SH = 0;
4561 
4562  if (Val.getOpcode() == ISD::ANY_EXTEND) {
4563  auto Op0 = Val.getOperand(0);
4564  if ( Op0.getOpcode() == ISD::SRL &&
4565  isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
4566 
4567  auto ResultType = Val.getNode()->getValueType(0);
4568  auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
4569  ResultType);
4570  SDValue IDVal (ImDef, 0);
4571 
4572  Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
4573  ResultType, IDVal, Op0.getOperand(0),
4574  getI32Imm(1, dl)), 0);
4575  SH = 64 - Imm;
4576  }
4577  }
4578 
4579  // If the operand is a logical right shift, we can fold it into this
4580  // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
4581  // for n <= mb. The right shift is really a left rotate followed by a
4582  // mask, and this mask is a more-restrictive sub-mask of the mask implied
4583  // by the shift.
4584  if (Val.getOpcode() == ISD::SRL &&
4585  isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
4586  assert(Imm < 64 && "Illegal shift amount");
4587  Val = Val.getOperand(0);
4588  SH = 64 - Imm;
4589  }
4590 
4591  SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
4592  CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
4593  return;
4594  }
4595  // If this is a negated 64-bit zero-extension mask,
4596  // i.e. the immediate is a sequence of ones from most significant side
4597  // and all zero for reminder, we should use rldicr.
4598  if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
4599  isMask_64(~Imm64)) {
4600  SDValue Val = N->getOperand(0);
4601  MB = 63 - countTrailingOnes(~Imm64);
4602  SH = 0;
4603  SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
4604  CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
4605  return;
4606  }
4607 
4608  // AND X, 0 -> 0, not "rlwinm 32".
4609  if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
4610  ReplaceUses(SDValue(N, 0), N->getOperand(1));
4611  return;
4612  }
4613  // ISD::OR doesn't get all the bitfield insertion fun.
4614  // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
4615  // bitfield insert.
4616  if (isInt32Immediate(N->getOperand(1), Imm) &&
4617  N->getOperand(0).getOpcode() == ISD::OR &&
4618  isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
4619  // The idea here is to check whether this is equivalent to:
4620  // (c1 & m) | (x & ~m)
4621  // where m is a run-of-ones mask. The logic here is that, for each bit in
4622  // c1 and c2:
4623  // - if both are 1, then the output will be 1.
4624  // - if both are 0, then the output will be 0.
4625  // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
4626  // come from x.
4627  // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
4628  // be 0.
4629  // If that last condition is never the case, then we can form m from the
4630  // bits that are the same between c1 and c2.
4631  unsigned MB, ME;
4632  if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) {
4633  SDValue Ops[] = { N->getOperand(0).getOperand(0),
4634  N->getOperand(0).getOperand(1),
4635  getI32Imm(0, dl), getI32Imm(MB, dl),
4636  getI32Imm(ME, dl) };
4637  ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
4638  return;
4639  }
4640  }
4641 
4642  // Other cases are autogenerated.
4643  break;
4644  }
4645  case ISD::OR: {
4646  if (N->getValueType(0) == MVT::i32)
4647  if (tryBitfieldInsert(N))
4648  return;
4649 
4650  int16_t Imm;
4651  if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
4652  isIntS16Immediate(N->getOperand(1), Imm)) {
4653  KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0));
4654 
4655  // If this is equivalent to an add, then we can fold it with the
4656  // FrameIndex calculation.
4657  if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
4658  selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
4659  return;
4660  }
4661  }
4662 
4663  // OR with a 32-bit immediate can be handled by ori + oris
4664  // without creating an immediate in a GPR.
4665  uint64_t Imm64 = 0;
4666  bool IsPPC64 = PPCSubTarget->isPPC64();
4667  if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
4668  (Imm64 & ~0xFFFFFFFFuLL) == 0) {
4669  // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
4670  uint64_t ImmHi = Imm64 >> 16;
4671  uint64_t ImmLo = Imm64 & 0xFFFF;
4672  if (ImmHi != 0 && ImmLo != 0) {
4673  SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
4674  N->getOperand(0),
4675  getI16Imm(ImmLo, dl));
4676  SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
4677  CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1);
4678  return;
4679  }
4680  }
4681 
4682  // Other cases are autogenerated.
4683  break;
4684  }
4685  case ISD::XOR: {
4686  // XOR with a 32-bit immediate can be handled by xori + xoris
4687  // without creating an immediate in a GPR.
4688  uint64_t Imm64 = 0;
4689  bool IsPPC64 = PPCSubTarget->isPPC64();
4690  if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
4691  (Imm64 & ~0xFFFFFFFFuLL) == 0) {
4692  // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
4693  uint64_t ImmHi = Imm64 >> 16;
4694  uint64_t ImmLo = Imm64 & 0xFFFF;
4695  if (ImmHi != 0 && ImmLo != 0) {
4696  SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
4697  N->getOperand(0),
4698  getI16Imm(ImmLo, dl));
4699  SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
4700  CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1);
4701  return;
4702  }
4703  }
4704 
4705  break;
4706  }
4707  case ISD::ADD: {
4708  int16_t Imm;
4709  if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
4710  isIntS16Immediate(N->getOperand(1), Imm)) {
4711  selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
4712  return;
4713  }
4714 
4715  break;
4716  }
4717  case ISD::SHL: {
4718  unsigned Imm, SH, MB, ME;
4719  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
4720  isRotateAndMask(N, Imm, true, SH, MB, ME)) {
4721  SDValue Ops[] = { N->getOperand(0).getOperand(0),
4722  getI32Imm(SH, dl), getI32Imm(MB, dl),
4723  getI32Imm(ME, dl) };
4724  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4725  return;
4726  }
4727 
4728  // Other cases are autogenerated.
4729  break;
4730  }
4731  case ISD::SRL: {
4732  unsigned Imm, SH, MB, ME;
4733  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
4734  isRotateAndMask(N, Imm, true, SH, MB, ME)) {
4735  SDValue Ops[] = { N->getOperand(0).getOperand(0),
4736  getI32Imm(SH, dl), getI32Imm(MB, dl),
4737  getI32Imm(ME, dl) };
4738  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4739  return;
4740  }
4741 
4742  // Other cases are autogenerated.
4743  break;
4744  }
4745  // FIXME: Remove this once the ANDI glue bug is fixed:
4747  case PPCISD::ANDIo_1_GT_BIT: {
4748  if (!ANDIGlueBug)
4749  break;
4750 
4751  EVT InVT = N->getOperand(0).getValueType();
4752  assert((InVT == MVT::i64 || InVT == MVT::i32) &&
4753  "Invalid input type for ANDIo_1_EQ_BIT");
4754 
4755  unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDIo8 : PPC::ANDIo;
4756  SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
4757  N->getOperand(0),
4758  CurDAG->getTargetConstant(1, dl, InVT)),
4759  0);
4760  SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
4761  SDValue SRIdxVal =
4762  CurDAG->getTargetConstant(N->getOpcode() == PPCISD::ANDIo_1_EQ_BIT ?
4763  PPC::sub_eq : PPC::sub_gt, dl, MVT::i32);
4764 
4765  CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg,
4766  SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */);
4767  return;
4768  }
4769  case ISD::SELECT_CC: {
4770  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
4771  EVT PtrVT =
4772  CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
4773  bool isPPC64 = (PtrVT == MVT::i64);
4774 
4775  // If this is a select of i1 operands, we'll pattern match it.
4776  if (PPCSubTarget->useCRBits() &&
4777  N->getOperand(0).getValueType() == MVT::i1)
4778  break;
4779 
4780  if (PPCSubTarget->isISA3_0() && PPCSubTarget->isPPC64()) {
4781  bool NeedSwapOps = false;
4782  bool IsUnCmp = false;
4783  if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) {
4784  SDValue LHS = N->getOperand(0);
4785  SDValue RHS = N->getOperand(1);
4786  if (NeedSwapOps)
4787  std::swap(LHS, RHS);
4788 
4789  // Make use of SelectCC to generate the comparison to set CR bits, for
4790  // equality comparisons having one literal operand, SelectCC probably
4791  // doesn't need to materialize the whole literal and just use xoris to
4792  // check it first, it leads the following comparison result can't
4793  // exactly represent GT/LT relationship. So to avoid this we specify
4794  // SETGT/SETUGT here instead of SETEQ.
4795  SDValue GenCC =
4796  SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl);
4797  CurDAG->SelectNodeTo(
4798  N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB,
4799  N->getValueType(0), GenCC);
4800  NumP9Setb++;
4801  return;
4802  }
4803  }
4804 
4805  // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
4806  if (!isPPC64)
4807  if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
4808  if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
4809  if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
4810  if (N1C->isNullValue() && N3C->isNullValue() &&
4811  N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
4812  // FIXME: Implement this optzn for PPC64.
4813  N->getValueType(0) == MVT::i32) {
4814  SDNode *Tmp =
4815  CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4816  N->getOperand(0), getI32Imm(~0U, dl));
4817  CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0),
4818  N->getOperand(0), SDValue(Tmp, 1));
4819  return;
4820  }
4821 
4822  SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
4823 
4824  if (N->getValueType(0) == MVT::i1) {
4825  // An i1 select is: (c & t) | (!c & f).
4826  bool Inv;
4827  unsigned Idx = getCRIdxForSetCC(CC, Inv);
4828 
4829  unsigned SRI;
4830  switch (Idx) {
4831  default: llvm_unreachable("Invalid CC index");
4832  case 0: SRI = PPC::sub_lt; break;
4833  case 1: SRI = PPC::sub_gt; break;
4834  case 2: SRI = PPC::sub_eq; break;
4835  case 3: SRI = PPC::sub_un; break;
4836  }
4837 
4838  SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);
4839 
4840  SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
4841  CCBit, CCBit), 0);
4842  SDValue C = Inv ? NotCCBit : CCBit,
4843  NotC = Inv ? CCBit : NotCCBit;
4844 
4845  SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
4846  C, N->getOperand(2)), 0);
4847  SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
4848  NotC, N->getOperand(3)), 0);
4849 
4850  CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
4851  return;
4852  }
4853 
4854  unsigned BROpc = getPredicateForSetCC(CC);
4855 
4856  unsigned SelectCCOp;
4857  if (N->getValueType(0) == MVT::i32)
4858  SelectCCOp = PPC::SELECT_CC_I4;
4859  else if (N->getValueType(0) == MVT::i64)
4860  SelectCCOp = PPC::SELECT_CC_I8;
4861  else if (N->getValueType(0) == MVT::f32) {
4862  if (PPCSubTarget->hasP8Vector())
4863  SelectCCOp = PPC::SELECT_CC_VSSRC;
4864  else if (PPCSubTarget->hasSPE())
4865  SelectCCOp = PPC::SELECT_CC_SPE4;
4866  else
4867  SelectCCOp = PPC::SELECT_CC_F4;
4868  } else if (N->getValueType(0) == MVT::f64) {
4869  if (PPCSubTarget->hasVSX())
4870  SelectCCOp = PPC::SELECT_CC_VSFRC;
4871  else if (PPCSubTarget->hasSPE())
4872  SelectCCOp = PPC::SELECT_CC_SPE;
4873  else
4874  SelectCCOp = PPC::SELECT_CC_F8;
4875  } else if (N->getValueType(0) == MVT::f128)
4876  SelectCCOp = PPC::SELECT_CC_F16;
4877  else if (PPCSubTarget->hasSPE())
4878  SelectCCOp = PPC::SELECT_CC_SPE;
4879  else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64)
4880  SelectCCOp = PPC::SELECT_CC_QFRC;
4881  else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32)
4882  SelectCCOp = PPC::SELECT_CC_QSRC;
4883  else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1)
4884  SelectCCOp = PPC::SELECT_CC_QBRC;
4885  else if (N->getValueType(0) == MVT::v2f64 ||
4886  N->getValueType(0) == MVT::v2i64)
4887  SelectCCOp = PPC::SELECT_CC_VSRC;
4888  else
4889  SelectCCOp = PPC::SELECT_CC_VRRC;
4890 
4891  SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
4892  getI32Imm(BROpc, dl) };
4893  CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
4894  return;
4895  }
4896  case ISD::VECTOR_SHUFFLE:
4897  if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
4898  N->getValueType(0) == MVT::v2i64)) {
4899  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
4900 
4901  SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
4902  Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
4903  unsigned DM[2];
4904 
4905  for (int i = 0; i < 2; ++i)
4906  if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)
4907  DM[i] = 0;
4908  else
4909  DM[i] = 1;
4910 
4911  if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
4912  Op1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
4913  isa<LoadSDNode>(Op1.getOperand(0))) {
4914  LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
4915  SDValue Base, Offset;
4916 
4917  if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
4918  (LD->getMemoryVT() == MVT::f64 ||
4919  LD->getMemoryVT() == MVT::i64) &&
4920  SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
4921  SDValue Chain = LD->getChain();
4922  SDValue Ops[] = { Base, Offset, Chain };
4923  MachineMemOperand *MemOp = LD->getMemOperand();
4924  SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
4925  N->getValueType(0), Ops);
4926  CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp});
4927  return;
4928  }
4929  }
4930 
4931  // For little endian, we must swap the input operands and adjust
4932  // the mask elements (reverse and invert them).
4933  if (PPCSubTarget->isLittleEndian()) {
4934  std::swap(Op1, Op2);
4935  unsigned tmp = DM[0];
4936  DM[0] = 1 - DM[1];
4937  DM[1] = 1 - tmp;
4938  }
4939 
4940  SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
4941  MVT::i32);
4942  SDValue Ops[] = { Op1, Op2, DMV };
4943  CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
4944  return;
4945  }
4946 
4947  break;
4948  case PPCISD::BDNZ:
4949  case PPCISD::BDZ: {
4950  bool IsPPC64 = PPCSubTarget->isPPC64();
4951  SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
4952  CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ
4953  ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
4954  : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
4955  MVT::Other, Ops);
4956  return;
4957  }
4958  case PPCISD::COND_BRANCH: {
4959  // Op #0 is the Chain.
4960  // Op #1 is the PPC::PRED_* number.
4961  // Op #2 is the CR#
4962  // Op #3 is the Dest MBB
4963  // Op #4 is the Flag.
4964  // Prevent PPC::PRED_* from being selected into LI.
4965  unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
4966  if (EnableBranchHint)
4967  PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(3));
4968 
4969  SDValue Pred = getI32Imm(PCC, dl);
4970  SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
4971  N->getOperand(0), N->getOperand(4) };
4972  CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
4973  return;
4974  }
4975  case ISD::BR_CC: {
4976  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
4977  unsigned PCC = getPredicateForSetCC(CC);
4978 
4979  if (N->getOperand(2).getValueType() == MVT::i1) {
4980  unsigned Opc;
4981  bool Swap;
4982  switch (PCC) {
4983  default: llvm_unreachable("Unexpected Boolean-operand predicate");
4984  case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break;
4985  case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break;
4986  case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break;
4987  case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break;
4988  case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;
4989  case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break;
4990  }
4991 
4992  // A signed comparison of i1 values produces the opposite result to an
4993  // unsigned one if the condition code includes less-than or greater-than.
4994  // This is because 1 is the most negative signed i1 number and the most
4995  // positive unsigned i1 number. The CR-logical operations used for such
4996  // comparisons are non-commutative so for signed comparisons vs. unsigned
4997  // ones, the input operands just need to be swapped.
4998  if (ISD::isSignedIntSetCC(CC))
4999  Swap = !Swap;
5000 
5001  SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
5002  N->getOperand(Swap ? 3 : 2),
5003  N->getOperand(Swap ? 2 : 3)), 0);
5004  CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4),
5005  N->getOperand(0));
5006  return;
5007  }
5008 
5009  if (EnableBranchHint)
5010  PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(4));
5011 
5012  SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
5013  SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
5014  N->getOperand(4), N->getOperand(0) };
5015  CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
5016  return;
5017  }
5018  case ISD::BRIND: {
5019  // FIXME: Should custom lower this.
5020  SDValue Chain = N->getOperand(0);
5021  SDValue Target = N->getOperand(1);
5022  unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
5023  unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
5024  Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
5025  Chain), 0);
5026  CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
5027  return;
5028  }
5029  case PPCISD::TOC_ENTRY: {
5030  assert ((PPCSubTarget->isPPC64() || PPCSubTarget->isSVR4ABI()) &&
5031  "Only supported for 64-bit ABI and 32-bit SVR4");
5032  if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) {
5033  SDValue GA = N->getOperand(0);
5034  SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA,
5035  N->getOperand(1));
5036  transferMemOperands(N, MN);
5037  ReplaceNode(N, MN);
5038  return;
5039  }
5040 
5041  // For medium and large code model, we generate two instructions as
5042  // described below. Otherwise we allow SelectCodeCommon to handle this,
5043  // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA.
5044  CodeModel::Model CModel = TM.getCodeModel();
5045  if (CModel != CodeModel::Medium && CModel != CodeModel::Large)
5046  break;
5047 
5048  // The first source operand is a TargetGlobalAddress or a TargetJumpTable.
5049  // If it must be toc-referenced according to PPCSubTarget, we generate:
5050  // LDtocL(@sym, ADDIStocHA(%x2, @sym))
5051  // Otherwise we generate:
5052  // ADDItocL(ADDIStocHA(%x2, @sym), @sym)
5053  SDValue GA = N->getOperand(0);
5054  SDValue TOCbase = N->getOperand(1);
5055  SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
5056  TOCbase, GA);
5057  if (PPCLowering->isAccessedAsGotIndirect(GA)) {
5058  // If it is access as got-indirect, we need an extra LD to load
5059  // the address.
5060  SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
5061  SDValue(Tmp, 0));
5062  transferMemOperands(N, MN);
5063  ReplaceNode(N, MN);
5064  return;
5065  }
5066 
5067  // Build the address relative to the TOC-pointer..
5068  ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
5069  SDValue(Tmp, 0), GA));
5070  return;
5071  }
5072  case PPCISD::PPC32_PICGOT:
5073  // Generate a PIC-safe GOT reference.
5074  assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() &&
5075  "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
5076  CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT,
5077  PPCLowering->getPointerTy(CurDAG->getDataLayout()),
5078  MVT::i32);
5079  return;
5080 
5081  case PPCISD::VADD_SPLAT: {
5082  // This expands into one of three sequences, depending on whether
5083  // the first operand is odd or even, positive or negative.
5084  assert(isa<ConstantSDNode>(N->getOperand(0)) &&
5085  isa<ConstantSDNode>(N->getOperand(1)) &&
5086  "Invalid operand on VADD_SPLAT!");
5087 
5088  int Elt = N->getConstantOperandVal(0);
5089  int EltSize = N->getConstantOperandVal(1);
5090  unsigned Opc1, Opc2, Opc3;
5091  EVT VT;
5092 
5093  if (EltSize == 1) {
5094  Opc1 = PPC::VSPLTISB;
5095  Opc2 = PPC::VADDUBM;
5096  Opc3 = PPC::VSUBUBM;
5097  VT = MVT::v16i8;
5098  } else if (EltSize == 2) {
5099  Opc1 = PPC::VSPLTISH;
5100  Opc2 = PPC::VADDUHM;
5101  Opc3 = PPC::VSUBUHM;
5102  VT = MVT::v8i16;
5103  } else {
5104  assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
5105  Opc1 = PPC::VSPLTISW;
5106  Opc2 = PPC::VADDUWM;
5107  Opc3 = PPC::VSUBUWM;
5108  VT = MVT::v4i32;
5109  }
5110 
5111  if ((Elt & 1) == 0) {
5112  // Elt is even, in the range [-32,-18] + [16,30].
5113  //
5114  // Convert: VADD_SPLAT elt, size
5115  // Into: tmp = VSPLTIS[BHW] elt
5116  // VADDU[BHW]M tmp, tmp
5117  // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
5118  SDValue EltVal = getI32Imm(Elt >> 1, dl);
5119  SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
5120  SDValue TmpVal = SDValue(Tmp, 0);
5121  ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal));
5122  return;
5123  } else if (Elt > 0) {
5124  // Elt is odd and positive, in the range [17,31].
5125  //
5126  // Convert: VADD_SPLAT elt, size
5127  // Into: tmp1 = VSPLTIS[BHW] elt-16
5128  // tmp2 = VSPLTIS[BHW] -16
5129  // VSUBU[BHW]M tmp1, tmp2
5130  SDValue EltVal = getI32Imm(Elt - 16, dl);
5131  SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
5132  EltVal = getI32Imm(-16, dl);
5133  SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
5134  ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
5135  SDValue(Tmp2, 0)));
5136  return;
5137  } else {
5138  // Elt is odd and negative, in the range [-31,-17].
5139  //
5140  // Convert: VADD_SPLAT elt, size
5141  // Into: tmp1 = VSPLTIS[BHW] elt+16
5142  // tmp2 = VSPLTIS[BHW] -16
5143  // VADDU[BHW]M tmp1, tmp2
5144  SDValue EltVal = getI32Imm(Elt + 16, dl);
5145  SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
5146  EltVal = getI32Imm(-16, dl);
5147  SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
5148  ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
5149  SDValue(Tmp2, 0)));
5150  return;
5151  }
5152  }
5153  }
5154 
5155  SelectCode(N);
5156 }
5157 
5158 // If the target supports the cmpb instruction, do the idiom recognition here.
5159 // We don't do this as a DAG combine because we don't want to do it as nodes
5160 // are being combined (because we might miss part of the eventual idiom). We
5161 // don't want to do it during instruction selection because we want to reuse
5162 // the logic for lowering the masking operations already part of the
5163 // instruction selector.
5164 SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
5165  SDLoc dl(N);
5166 
5167  assert(N->getOpcode() == ISD::OR &&
5168  "Only OR nodes are supported for CMPB");
5169 
5170  SDValue Res;
5171  if (!PPCSubTarget->hasCMPB())
5172  return Res;
5173 
5174  if (N->getValueType(0) != MVT::i32 &&
5175  N->getValueType(0) != MVT::i64)
5176  return Res;
5177 
5178  EVT VT = N->getValueType(0);
5179 
5180  SDValue RHS, LHS;
5181  bool BytesFound[8] = {false, false, false, false, false, false, false, false};
5182  uint64_t Mask = 0, Alt = 0;
5183 
5184  auto IsByteSelectCC = [this](SDValue O, unsigned &b,
5185  uint64_t &Mask, uint64_t &Alt,
5186  SDValue &LHS, SDValue &RHS) {
5187  if (O.getOpcode() != ISD::SELECT_CC)
5188  return false;
5189  ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get();
5190 
5191  if (!isa<ConstantSDNode>(O.getOperand(2)) ||
5192  !isa<ConstantSDNode>(O.getOperand(3)))
5193  return false;
5194 
5195  uint64_t PM = O.getConstantOperandVal(2);
5196  uint64_t PAlt = O.getConstantOperandVal(3);
5197  for (b = 0; b < 8; ++b) {
5198  uint64_t Mask = UINT64_C(0xFF) << (8*b);
5199  if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt)
5200  break;
5201  }
5202 
5203  if (b == 8)
5204  return false;
5205  Mask |= PM;
5206  Alt |= PAlt;
5207 
5208  if (!isa<ConstantSDNode>(O.getOperand(1)) ||
5209  O.getConstantOperandVal(1) != 0) {
5210  SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1);
5211  if (Op0.getOpcode() == ISD::TRUNCATE)
5212  Op0 = Op0.getOperand(0);
5213  if (Op1.getOpcode() == ISD::TRUNCATE)
5214  Op1 = Op1.getOperand(0);
5215 
5216  if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL &&
5217  Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ &&
5218  isa<ConstantSDNode>(Op0.getOperand(1))) {
5219 
5220  unsigned Bits = Op0.getValueSizeInBits();
5221  if (b != Bits/8-1)
5222  return false;
5223  if (Op0.getConstantOperandVal(1) != Bits-8)
5224  return false;
5225 
5226  LHS = Op0.getOperand(0);
5227  RHS = Op1.getOperand(0);
5228  return true;
5229  }
5230 
5231  // When we have small integers (i16 to be specific), the form present
5232  // post-legalization uses SETULT in the SELECT_CC for the
5233  // higher-order byte, depending on the fact that the
5234  // even-higher-order bytes are known to all be zero, for example:
5235  // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
5236  // (so when the second byte is the same, because all higher-order
5237  // bits from bytes 3 and 4 are known to be zero, the result of the
5238  // xor can be at most 255)
5239  if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT &&
5240  isa<ConstantSDNode>(O.getOperand(1))) {
5241 
5242  uint64_t ULim = O.getConstantOperandVal(1);
5243  if (ULim != (UINT64_C(1) << b*8))
5244  return false;
5245 
5246  // Now we need to make sure that the upper bytes are known to be
5247  // zero.
5248  unsigned Bits = Op0.getValueSizeInBits();
5249  if (!CurDAG->MaskedValueIsZero(
5250  Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8)))
5251  return false;
5252 
5253  LHS = Op0.getOperand(0);
5254  RHS = Op0.getOperand(1);
5255  return true;
5256  }
5257 
5258  return false;
5259  }
5260 
5261  if (CC != ISD::SETEQ)
5262  return false;
5263 
5264  SDValue Op = O.getOperand(0);
5265  if (Op.getOpcode() == ISD::AND) {
5266  if (!isa<ConstantSDNode>(Op.getOperand(1)))
5267  return false;
5268  if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b)))
5269  return false;
5270 
5271  SDValue XOR = Op.getOperand(0);
5272  if (XOR.getOpcode() == ISD::TRUNCATE)
5273  XOR = XOR.getOperand(0);
5274  if (XOR.getOpcode() != ISD::XOR)
5275  return false;
5276 
5277  LHS = XOR.getOperand(0);
5278  RHS = XOR.getOperand(1);
5279  return true;
5280  } else if (Op.getOpcode() == ISD::SRL) {
5281  if (!isa<ConstantSDNode>(Op.getOperand(1)))
5282  return false;
5283  unsigned Bits = Op.getValueSizeInBits();
5284  if (b != Bits/8-1)
5285  return false;
5286  if (Op.getConstantOperandVal(1) != Bits-8)
5287  return false;
5288 
5289  SDValue XOR = Op.getOperand(0);
5290  if (XOR.getOpcode() == ISD::TRUNCATE)
5291  XOR = XOR.getOperand(0);
5292  if (XOR.getOpcode() != ISD::XOR)
5293  return false;
5294 
5295  LHS = XOR.getOperand(0);
5296  RHS = XOR.getOperand(1);
5297  return true;
5298  }
5299 
5300  return false;
5301  };
5302 
5303  SmallVector<SDValue, 8> Queue(1, SDValue(N, 0));
5304  while (!Queue.empty()) {
5305  SDValue V = Queue.pop_back_val();
5306 
5307  for (const SDValue &O : V.getNode()->ops()) {
5308  unsigned b;
5309  uint64_t M = 0, A = 0;
5310  SDValue OLHS, ORHS;
5311  if (O.getOpcode() == ISD::OR) {
5312  Queue.push_back(O);
5313  } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) {
5314  if (!LHS) {
5315  LHS = OLHS;
5316  RHS = ORHS;
5317  BytesFound[b] = true;
5318  Mask |= M;
5319  Alt |= A;
5320  } else if ((LHS == ORHS && RHS == OLHS) ||
5321  (RHS == ORHS && LHS == OLHS)) {
5322  BytesFound[b] = true;
5323  Mask |= M;
5324  Alt |= A;
5325  } else {
5326  return Res;
5327  }
5328  } else {
5329  return Res;
5330  }
5331  }
5332  }
5333 
5334  unsigned LastB = 0, BCnt = 0;
5335  for (unsigned i = 0; i < 8; ++i)
5336  if (BytesFound[LastB]) {
5337  ++BCnt;
5338  LastB = i;
5339  }
5340 
5341  if (!LastB || BCnt < 2)
5342  return Res;
5343 
5344  // Because we'll be zero-extending the output anyway if don't have a specific
5345  // value for each input byte (via the Mask), we can 'anyext' the inputs.
5346  if (LHS.getValueType() != VT) {
5347  LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT);
5348  RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT);
5349  }
5350 
5351  Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS);
5352 
5353  bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1);
5354  if (NonTrivialMask && !Alt) {
5355  // Res = Mask & CMPB
5356  Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
5357  CurDAG->getConstant(Mask, dl, VT));
5358  } else if (Alt) {
5359  // Res = (CMPB & Mask) | (~CMPB & Alt)
5360  // Which, as suggested here:
5361  // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
5362  // can be written as:
5363  // Res = Alt ^ ((Alt ^ Mask) & CMPB)
5364  // useful because the (Alt ^ Mask) can be pre-computed.
5365  Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
5366  CurDAG->getConstant(Mask ^ Alt, dl, VT));
5367  Res = CurDAG->getNode(ISD::XOR, dl, VT, Res,
5368  CurDAG->getConstant(Alt, dl, VT));
5369  }
5370 
5371  return Res;
5372 }
5373 
5374 // When CR bit registers are enabled, an extension of an i1 variable to a i32
5375 // or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
5376 // involves constant materialization of a 0 or a 1 or both. If the result of
5377 // the extension is then operated upon by some operator that can be constant
5378 // folded with a constant 0 or 1, and that constant can be materialized using
5379 // only one instruction (like a zero or one), then we should fold in those
5380 // operations with the select.
5381 void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
5382  if (!PPCSubTarget->useCRBits())
5383  return;
5384 
5385  if (N->getOpcode() != ISD::ZERO_EXTEND &&
5386  N->getOpcode() != ISD::SIGN_EXTEND &&
5387  N->getOpcode() != ISD::ANY_EXTEND)
5388  return;
5389 
5390  if (N->getOperand(0).getValueType() != MVT::i1)
5391  return;
5392 
5393  if (!N->hasOneUse())
5394  return;
5395 
5396  SDLoc dl(N);
5397  EVT VT = N->getValueType(0);
5398  SDValue Cond = N->getOperand(0);
5399  SDValue ConstTrue =
5400  CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT);
5401  SDValue ConstFalse = CurDAG->getConstant(0, dl, VT);
5402 
5403  do {
5404  SDNode *User = *N->use_begin();
5405  if (User->getNumOperands() != 2)
5406  break;
5407 
5408  auto TryFold = [this, N, User, dl](SDValue Val) {
5409  SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
5410  SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
5411  SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
5412 
5413  return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl,
5414  User->getValueType(0),
5415  O0.getNode(), O1.getNode());
5416  };
5417 
5418  // FIXME: When the semantics of the interaction between select and undef
5419  // are clearly defined, it may turn out to be unnecessary to break here.
5420  SDValue TrueRes = TryFold(ConstTrue);
5421  if (!TrueRes || TrueRes.isUndef())
5422  break;
5423  SDValue FalseRes = TryFold(ConstFalse);
5424  if (!FalseRes || FalseRes.isUndef())
5425  break;
5426 
5427  // For us to materialize these using one instruction, we must be able to
5428  // represent them as signed 16-bit integers.
5429  uint64_t True = cast<ConstantSDNode>(TrueRes)->getZExtValue(),
5430  False = cast<ConstantSDNode>(FalseRes)->getZExtValue();
5431  if (!isInt<16>(True) || !isInt<16>(False))
5432  break;
5433 
5434  // We can replace User with a new SELECT node, and try again to see if we
5435  // can fold the select with its user.
5436  Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);
5437  N = User;
5438  ConstTrue = TrueRes;
5439  ConstFalse = FalseRes;
5440  } while (N->hasOneUse());
5441 }
5442 
5443 void PPCDAGToDAGISel::PreprocessISelDAG() {
5444  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
5445 
5446  bool MadeChange = false;
5447  while (Position != CurDAG->allnodes_begin()) {
5448  SDNode *N = &*--Position;
5449  if (N->use_empty())
5450  continue;
5451 
5452  SDValue Res;
5453  switch (N->getOpcode()) {
5454  default: break;
5455  case ISD::OR:
5456  Res = combineToCMPB(N);
5457  break;
5458  }
5459 
5460  if (!Res)
5461  foldBoolExts(Res, N);
5462 
5463  if (Res) {
5464  LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
5465  LLVM_DEBUG(N->dump(CurDAG));
5466  LLVM_DEBUG(dbgs() << "\nNew: ");
5467  LLVM_DEBUG(Res.getNode()->dump(CurDAG));
5468  LLVM_DEBUG(dbgs() << "\n");
5469 
5470  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
5471  MadeChange = true;
5472  }
5473  }
5474 
5475  if (MadeChange)
5476  CurDAG->RemoveDeadNodes();
5477 }
5478 
5479 /// PostprocessISelDAG - Perform some late peephole optimizations
5480 /// on the DAG representation.
5481 void PPCDAGToDAGISel::PostprocessISelDAG() {
5482  // Skip peepholes at -O0.
5483  if (TM.getOptLevel() == CodeGenOpt::None)
5484  return;
5485 
5486  PeepholePPC64();
5487  PeepholeCROps();
5488  PeepholePPC64ZExt();
5489 }
5490 
5491 // Check if all users of this node will become isel where the second operand
5492 // is the constant zero. If this is so, and if we can negate the condition,
5493 // then we can flip the true and false operands. This will allow the zero to
5494 // be folded with the isel so that we don't need to materialize a register
5495 // containing zero.
5496 bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
5497  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
5498  UI != UE; ++UI) {
5499  SDNode *User = *UI;
5500  if (!User->isMachineOpcode())
5501  return false;
5502  if (User->getMachineOpcode() != PPC::SELECT_I4 &&
5503  User->getMachineOpcode() != PPC::SELECT_I8)
5504  return false;
5505 
5506  SDNode *Op2 = User->getOperand(2).getNode();
5507  if (!Op2->isMachineOpcode())
5508  return false;
5509 
5510  if (Op2->getMachineOpcode() != PPC::LI &&
5511  Op2->getMachineOpcode() != PPC::LI8)
5512  return false;
5513 
5515  if (!C)
5516  return false;
5517 
5518  if (!C->isNullValue())
5519  return false;
5520  }
5521 
5522  return true;
5523 }
5524 
5525 void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
5526  SmallVector<SDNode *, 4> ToReplace;
5527  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
5528  UI != UE; ++UI) {
5529  SDNode *User = *UI;
5530  assert((User->getMachineOpcode() == PPC::SELECT_I4 ||
5531  User->getMachineOpcode() == PPC::SELECT_I8) &&
5532  "Must have all select users");
5533  ToReplace.push_back(User);
5534  }
5535 
5536  for (SmallVector<SDNode *, 4>::iterator UI = ToReplace.begin(),
5537  UE = ToReplace.end(); UI != UE; ++UI) {
5538  SDNode *User = *UI;
5539  SDNode *ResNode =
5540  CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User),
5541  User->getValueType(0), User->getOperand(0),
5542  User->getOperand(2),
5543  User->getOperand(1));
5544 
5545  LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
5546  LLVM_DEBUG(User->dump(CurDAG));
5547  LLVM_DEBUG(dbgs() << "\nNew: ");
5548  LLVM_DEBUG(ResNode->dump(CurDAG));
5549  LLVM_DEBUG(dbgs() << "\n");
5550 
5551  ReplaceUses(User, ResNode);
5552  }
5553 }
5554 
5555 void PPCDAGToDAGISel::PeepholeCROps() {
5556  bool IsModified;
5557  do {
5558  IsModified = false;
5559  for (SDNode &Node : CurDAG->allnodes()) {
5560  MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
5561  if (!MachineNode || MachineNode->use_empty())
5562  continue;
5563  SDNode *ResNode = MachineNode;
5564 
5565  bool Op1Set = false, Op1Unset = false,
5566  Op1Not = false,
5567  Op2Set = false, Op2Unset = false,
5568  Op2Not = false;
5569 
5570  unsigned Opcode = MachineNode->getMachineOpcode();
5571  switch (Opcode) {
5572  default: break;
5573  case PPC::CRAND:
5574  case PPC::CRNAND:
5575  case PPC::CROR:
5576  case PPC::CRXOR:
5577  case PPC::CRNOR:
5578  case PPC::CREQV:
5579  case PPC::CRANDC:
5580  case PPC::CRORC: {
5581  SDValue Op = MachineNode->getOperand(1);
5582  if (Op.isMachineOpcode()) {
5583  if (Op.getMachineOpcode() == PPC::CRSET)
5584  Op2Set = true;
5585  else if (Op.getMachineOpcode() == PPC::CRUNSET)
5586  Op2Unset = true;
5587  else if (Op.getMachineOpcode() == PPC::CRNOR &&
5588  Op.getOperand(0) == Op.getOperand(1))
5589  Op2Not = true;
5590  }
5592  }
5593  case PPC::BC:
5594  case PPC::BCn:
5595  case PPC::SELECT_I4:
5596  case PPC::SELECT_I8:
5597  case PPC::SELECT_F4:
5598  case PPC::SELECT_F8:
5599  case PPC::SELECT_QFRC:
5600  case PPC::SELECT_QSRC:
5601  case PPC::SELECT_QBRC:
5602  case PPC::SELECT_SPE:
5603  case PPC::SELECT_SPE4:
5604  case PPC::SELECT_VRRC:
5605  case PPC::SELECT_VSFRC:
5606  case PPC::SELECT_VSSRC:
5607  case PPC::SELECT_VSRC: {
5608  SDValue Op = MachineNode->getOperand(0);
5609  if (Op.isMachineOpcode()) {
5610  if (Op.getMachineOpcode() == PPC::CRSET)
5611  Op1Set = true;
5612  else if (Op.getMachineOpcode() == PPC::CRUNSET)
5613  Op1Unset = true;
5614  else if (Op.getMachineOpcode() == PPC::CRNOR &&
5615  Op.getOperand(0) == Op.getOperand(1))
5616  Op1Not = true;
5617  }
5618  }
5619  break;
5620  }
5621 
5622  bool SelectSwap = false;
5623  switch (Opcode) {
5624  default: break;
5625  case PPC::CRAND:
5626  if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5627  // x & x = x
5628  ResNode = MachineNode->getOperand(0).getNode();
5629  else if (Op1Set)
5630  // 1 & y = y
5631  ResNode = MachineNode->getOperand(1).getNode();
5632  else if (Op2Set)
5633  // x & 1 = x
5634  ResNode = MachineNode->getOperand(0).getNode();
5635  else if (Op1Unset || Op2Unset)
5636  // x & 0 = 0 & y = 0
5637  ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
5638  MVT::i1);
5639  else if (Op1Not)
5640  // ~x & y = andc(y, x)
5641  ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
5642  MVT::i1, MachineNode->getOperand(1),
5643  MachineNode->getOperand(0).
5644  getOperand(0));
5645  else if (Op2Not)
5646  // x & ~y = andc(x, y)
5647  ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
5648  MVT::i1, MachineNode->getOperand(0),
5649  MachineNode->getOperand(1).
5650  getOperand(0));
5651  else if (AllUsersSelectZero(MachineNode)) {
5652  ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
5653  MVT::i1, MachineNode->getOperand(0),
5654  MachineNode->getOperand(1));
5655  SelectSwap = true;
5656  }
5657  break;
5658  case PPC::CRNAND:
5659  if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5660  // nand(x, x) -> nor(x, x)
5661  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5662  MVT::i1, MachineNode->getOperand(0),
5663  MachineNode->getOperand(0));
5664  else if (Op1Set)
5665  // nand(1, y) -> nor(y, y)
5666  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5667  MVT::i1, MachineNode->getOperand(1),
5668  MachineNode->getOperand(1));
5669  else if (Op2Set)
5670  // nand(x, 1) -> nor(x, x)
5671  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5672  MVT::i1, MachineNode->getOperand(0),
5673  MachineNode->getOperand(0));
5674  else if (Op1Unset || Op2Unset)
5675  // nand(x, 0) = nand(0, y) = 1
5676  ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
5677  MVT::i1);
5678  else if (Op1Not)
5679  // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
5680  ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
5681  MVT::i1, MachineNode->getOperand(0).
5682  getOperand(0),
5683  MachineNode->getOperand(1));
5684  else if (Op2Not)
5685  // nand(x, ~y) = ~x | y = orc(y, x)
5686  ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
5687  MVT::i1, MachineNode->getOperand(1).
5688  getOperand(0),
5689  MachineNode->getOperand(0));
5690  else if (AllUsersSelectZero(MachineNode)) {
5691  ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
5692  MVT::i1, MachineNode->getOperand(0),
5693  MachineNode->getOperand(1));
5694  SelectSwap = true;
5695  }
5696  break;
5697  case PPC::CROR:
5698  if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5699  // x | x = x
5700  ResNode = MachineNode->getOperand(0).getNode();
5701  else if (Op1Set || Op2Set)
5702  // x | 1 = 1 | y = 1
5703  ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
5704  MVT::i1);
5705  else if (Op1Unset)
5706  // 0 | y = y
5707  ResNode = MachineNode->getOperand(1).getNode();
5708  else if (Op2Unset)
5709  // x | 0 = x
5710  ResNode = MachineNode->getOperand(0).getNode();
5711  else if (Op1Not)
5712  // ~x | y = orc(y, x)
5713  ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
5714  MVT::i1, MachineNode->getOperand(1),
5715  MachineNode->getOperand(0).
5716  getOperand(0));
5717  else if (Op2Not)
5718  // x | ~y = orc(x, y)
5719  ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
5720  MVT::i1, MachineNode->getOperand(0),
5721  MachineNode->getOperand(1).
5722  getOperand(0));
5723  else if (AllUsersSelectZero(MachineNode)) {
5724  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5725  MVT::i1, MachineNode->getOperand(0),
5726  MachineNode->getOperand(1));
5727  SelectSwap = true;
5728  }
5729  break;
5730  case PPC::CRXOR:
5731  if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5732  // xor(x, x) = 0
5733  ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
5734  MVT::i1);
5735  else if (Op1Set)
5736  // xor(1, y) -> nor(y, y)
5737  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5738  MVT::i1, MachineNode->getOperand(1),
5739  MachineNode->getOperand(1));
5740  else if (Op2Set)
5741  // xor(x, 1) -> nor(x, x)
5742  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5743  MVT::i1, MachineNode->getOperand(0),
5744  MachineNode->getOperand(0));
5745  else if (Op1Unset)
5746  // xor(0, y) = y
5747  ResNode = MachineNode->getOperand(1).getNode();
5748  else if (Op2Unset)
5749  // xor(x, 0) = x
5750  ResNode = MachineNode->getOperand(0).getNode();
5751  else if (Op1Not)
5752  // xor(~x, y) = eqv(x, y)
5753  ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
5754  MVT::i1, MachineNode->getOperand(0).
5755  getOperand(0),
5756  MachineNode->getOperand(1));
5757  else if (Op2Not)
5758  // xor(x, ~y) = eqv(x, y)
5759  ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
5760  MVT::i1, MachineNode->getOperand(0),
5761  MachineNode->getOperand(1).
5762  getOperand(0));
5763  else if (AllUsersSelectZero(MachineNode)) {
5764  ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
5765  MVT::i1, MachineNode->getOperand(0),
5766  MachineNode->getOperand(1));
5767  SelectSwap = true;
5768  }
5769  break;
5770  case PPC::CRNOR:
5771  if (Op1Set || Op2Set)
5772  // nor(1, y) -> 0
5773  ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
5774  MVT::i1);
5775  else if (Op1Unset)
5776  // nor(0, y) = ~y -> nor(y, y)
5777  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5778  MVT::i1, MachineNode->getOperand(1),
5779  MachineNode->getOperand(1));
5780  else if (Op2Unset)
5781  // nor(x, 0) = ~x
5782  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5783  MVT::i1, MachineNode->getOperand(0),
5784  MachineNode->getOperand(0));
5785  else if (Op1Not)
5786  // nor(~x, y) = andc(x, y)
5787  ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
5788  MVT::i1, MachineNode->getOperand(0).
5789  getOperand(0),
5790  MachineNode->getOperand(1));
5791  else if (Op2Not)
5792  // nor(x, ~y) = andc(y, x)
5793  ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
5794  MVT::i1, MachineNode->getOperand(1).
5795  getOperand(0),
5796  MachineNode->getOperand(0));
5797  else if (AllUsersSelectZero(MachineNode)) {
5798  ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
5799  MVT::i1, MachineNode->getOperand(0),
5800  MachineNode->getOperand(1));
5801  SelectSwap = true;
5802  }
5803  break;
5804  case PPC::CREQV:
5805  if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5806  // eqv(x, x) = 1
5807  ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
5808  MVT::i1);
5809  else if (Op1Set)
5810  // eqv(1, y) = y
5811  ResNode = MachineNode->getOperand(1).getNode();
5812  else if (Op2Set)
5813  // eqv(x, 1) = x
5814  ResNode = MachineNode->getOperand(0).getNode();
5815  else if (Op1Unset)
5816  // eqv(0, y) = ~y -> nor(y, y)
5817  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5818  MVT::i1, MachineNode->getOperand(1),
5819  MachineNode->getOperand(1));
5820  else if (Op2Unset)
5821  // eqv(x, 0) = ~x
5822  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5823  MVT::i1, MachineNode->getOperand(0),
5824  MachineNode->getOperand(0));
5825  else if (Op1Not)
5826  // eqv(~x, y) = xor(x, y)
5827  ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
5828  MVT::i1, MachineNode->getOperand(0).
5829  getOperand(0),
5830  MachineNode->getOperand(1));
5831  else if (Op2Not)
5832  // eqv(x, ~y) = xor(x, y)
5833  ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
5834  MVT::i1, MachineNode->getOperand(0),
5835  MachineNode->getOperand(1).
5836  getOperand(0));
5837  else if (AllUsersSelectZero(MachineNode)) {
5838  ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
5839  MVT::i1, MachineNode->getOperand(0),
5840  MachineNode->getOperand(1));
5841  SelectSwap = true;
5842  }
5843  break;
5844  case PPC::CRANDC:
5845  if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5846  // andc(x, x) = 0
5847  ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
5848  MVT::i1);
5849  else if (Op1Set)
5850  // andc(1, y) = ~y
5851  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5852  MVT::i1, MachineNode->getOperand(1),
5853  MachineNode->getOperand(1));
5854  else if (Op1Unset || Op2Set)
5855  // andc(0, y) = andc(x, 1) = 0
5856  ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
5857  MVT::i1);
5858  else if (Op2Unset)
5859  // andc(x, 0) = x
5860  ResNode = MachineNode->getOperand(0).getNode();
5861  else if (Op1Not)
5862  // andc(~x, y) = ~(x | y) = nor(x, y)
5863  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5864  MVT::i1, MachineNode->getOperand(0).
5865  getOperand(0),
5866  MachineNode->getOperand(1));
5867  else if (Op2Not)
5868  // andc(x, ~y) = x & y
5869  ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
5870  MVT::i1, MachineNode->getOperand(0),
5871  MachineNode->getOperand(1).
5872  getOperand(0));
5873  else if (AllUsersSelectZero(MachineNode)) {
5874  ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
5875  MVT::i1, MachineNode->getOperand(1),
5876  MachineNode->getOperand(0));
5877  SelectSwap = true;
5878  }
5879  break;
5880  case PPC::CRORC:
5881  if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5882  // orc(x, x) = 1
5883  ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
5884  MVT::i1);
5885  else if (Op1Set || Op2Unset)
5886  // orc(1, y) = orc(x, 0) = 1
5887  ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
5888  MVT::i1);
5889  else if (Op2Set)
5890  // orc(x, 1) = x
5891  ResNode = MachineNode->getOperand(0).getNode();
5892  else if (Op1Unset)
5893  // orc(0, y) = ~y
5894  ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5895  MVT::i1, MachineNode->getOperand(1),
5896  MachineNode->getOperand(1));
5897  else if (Op1Not)
5898  // orc(~x, y) = ~(x & y) = nand(x, y)
5899  ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
5900  MVT::i1, MachineNode->getOperand(0).
5901  getOperand(0),
5902  MachineNode->getOperand(1));
5903  else if (Op2Not)
5904  // orc(x, ~y) = x | y
5905  ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
5906  MVT::i1, MachineNode->getOperand(0),
5907  MachineNode->getOperand(1).
5908  getOperand(0));
5909  else if (AllUsersSelectZero(MachineNode)) {
5910  ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
5911  MVT::i1, MachineNode->getOperand(1),
5912  MachineNode->getOperand(0));
5913  SelectSwap = true;
5914  }
5915  break;
5916  case PPC::SELECT_I4:
5917  case PPC::SELECT_I8:
5918  case PPC::SELECT_F4:
5919  case PPC::SELECT_F8:
5920  case PPC::SELECT_QFRC:
5921  case PPC::SELECT_QSRC:
5922  case PPC::SELECT_QBRC:
5923  case PPC::SELECT_SPE:
5924  case PPC::SELECT_SPE4:
5925  case PPC::SELECT_VRRC:
5926  case PPC::SELECT_VSFRC:
5927  case PPC::SELECT_VSSRC:
5928  case PPC::SELECT_VSRC:
5929  if (Op1Set)
5930  ResNode = MachineNode->getOperand(1).getNode();
5931  else if (Op1Unset)
5932  ResNode = MachineNode->getOperand(2).getNode();
5933  else if (Op1Not)
5934  ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(),
5935  SDLoc(MachineNode),
5936  MachineNode->getValueType(0),
5937  MachineNode->getOperand(0).
5938  getOperand(0),
5939  MachineNode->getOperand(2),
5940  MachineNode->getOperand(1));
5941  break;
5942  case PPC::BC:
5943  case PPC::BCn:
5944  if (Op1Not)
5945  ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn :
5946  PPC::BC,
5947  SDLoc(MachineNode),
5948  MVT::Other,
5949  MachineNode->getOperand(0).
5950  getOperand(0),
5951  MachineNode->getOperand(1),
5952  MachineNode->getOperand(2));
5953  // FIXME: Handle Op1Set, Op1Unset here too.
5954  break;
5955  }
5956 
5957  // If we're inverting this node because it is used only by selects that
5958  // we'd like to swap, then swap the selects before the node replacement.
5959  if (SelectSwap)
5960  SwapAllSelectUsers(MachineNode);
5961 
5962  if (ResNode != MachineNode) {
5963  LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
5964  LLVM_DEBUG(MachineNode->dump(CurDAG));
5965  LLVM_DEBUG(dbgs() << "\nNew: ");
5966  LLVM_DEBUG(ResNode->dump(CurDAG));
5967  LLVM_DEBUG(dbgs() << "\n");
5968 
5969  ReplaceUses(MachineNode, ResNode);
5970  IsModified = true;
5971  }
5972  }
5973  if (IsModified)
5974  CurDAG->RemoveDeadNodes();
5975  } while (IsModified);
5976 }
5977 
5978 // Gather the set of 32-bit operations that are known to have their
5979 // higher-order 32 bits zero, where ToPromote contains all such operations.
5981  SmallPtrSetImpl<SDNode *> &ToPromote) {
5982  if (!Op32.isMachineOpcode())
5983  return false;
5984 
5985  // First, check for the "frontier" instructions (those that will clear the
5986  // higher-order 32 bits.
5987 
5988  // For RLWINM and RLWNM, we need to make sure that the mask does not wrap
5989  // around. If it does not, then these instructions will clear the
5990  // higher-order bits.
5991  if ((Op32.getMachineOpcode() == PPC::RLWINM ||
5992  Op32.getMachineOpcode() == PPC::RLWNM) &&
5993  Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) {
5994  ToPromote.insert(Op32.getNode());
5995  return true;
5996  }
5997 
5998  // SLW and SRW always clear the higher-order bits.
5999  if (Op32.getMachineOpcode() == PPC::SLW ||
6000  Op32.getMachineOpcode() == PPC::SRW) {
6001  ToPromote.insert(Op32.getNode());
6002  return true;
6003  }
6004 
6005  // For LI and LIS, we need the immediate to be positive (so that it is not
6006  // sign extended).
6007  if (Op32.getMachineOpcode() == PPC::LI ||
6008  Op32.getMachineOpcode() == PPC::LIS) {
6009  if (!isUInt<15>(Op32.getConstantOperandVal(0)))
6010  return false;
6011 
6012  ToPromote.insert(Op32.getNode());
6013  return true;
6014  }
6015 
6016  // LHBRX and LWBRX always clear the higher-order bits.
6017  if (Op32.getMachineOpcode() == PPC::LHBRX ||
6018  Op32.getMachineOpcode() == PPC::LWBRX) {
6019  ToPromote.insert(Op32.getNode());
6020  return true;
6021  }
6022 
6023  // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended.
6024  if (Op32.getMachineOpcode() == PPC::CNTLZW ||
6025  Op32.getMachineOpcode() == PPC::CNTTZW) {
6026  ToPromote.insert(Op32.getNode());
6027  return true;
6028  }
6029 
6030  // Next, check for those instructions we can look through.
6031 
6032  // Assuming the mask does not wrap around, then the higher-order bits are
6033  // taken directly from the first operand.
6034  if (Op32.getMachineOpcode() == PPC::RLWIMI &&
6035  Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) {
6036  SmallPtrSet<SDNode *, 16> ToPromote1;
6037  if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
6038  return false;
6039 
6040  ToPromote.insert(Op32.getNode());
6041  ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
6042  return true;
6043  }
6044 
6045  // For OR, the higher-order bits are zero if that is true for both operands.
6046  // For SELECT_I4, the same is true (but the relevant operand numbers are
6047  // shifted by 1).
6048  if (Op32.getMachineOpcode() == PPC::OR ||
6049  Op32.getMachineOpcode() == PPC::SELECT_I4) {
6050  unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0;
6051  SmallPtrSet<SDNode *, 16> ToPromote1;
6052  if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1))
6053  return false;
6054  if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1))
6055  return false;
6056 
6057  ToPromote.insert(Op32.getNode());
6058  ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
6059  return true;
6060  }
6061 
6062  // For ORI and ORIS, we need the higher-order bits of the first operand to be
6063  // zero, and also for the constant to be positive (so that it is not sign
6064  // extended).
6065  if (Op32.getMachineOpcode() == PPC::ORI ||
6066  Op32.getMachineOpcode() == PPC::ORIS) {
6067  SmallPtrSet<SDNode *, 16> ToPromote1;
6068  if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
6069  return false;
6070  if (!isUInt<15>(Op32.getConstantOperandVal(1)))
6071  return false;
6072 
6073  ToPromote.insert(Op32.getNode());
6074  ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
6075  return true;
6076  }
6077 
6078  // The higher-order bits of AND are zero if that is true for at least one of
6079  // the operands.
6080  if (Op32.getMachineOpcode() == PPC::AND) {
6081  SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2;
6082  bool Op0OK =
6083  PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
6084  bool Op1OK =
6085  PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2);
6086  if (!Op0OK && !Op1OK)
6087  return false;
6088 
6089  ToPromote.insert(Op32.getNode());
6090 
6091  if (Op0OK)
6092  ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
6093 
6094  if (Op1OK)
6095  ToPromote.insert(ToPromote2.begin(), ToPromote2.end());
6096 
6097  return true;
6098  }
6099 
6100  // For ANDI and ANDIS, the higher-order bits are zero if either that is true
6101  // of the first operand, or if the second operand is positive (so that it is
6102  // not sign extended).
6103  if (Op32.getMachineOpcode() == PPC::ANDIo ||
6104  Op32.getMachineOpcode() == PPC::ANDISo) {
6105  SmallPtrSet<SDNode *, 16> ToPromote1;
6106  bool Op0OK =
6107  PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
6108  bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1));
6109  if (!Op0OK && !Op1OK)
6110  return false;
6111 
6112  ToPromote.insert(Op32.getNode());
6113 
6114  if (Op0OK)
6115  ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
6116 
6117  return true;
6118  }
6119 
6120  return false;
6121 }
6122 
6123 void PPCDAGToDAGISel::PeepholePPC64ZExt() {
6124  if (!PPCSubTarget->isPPC64())
6125  return;
6126 
6127  // When we zero-extend from i32 to i64, we use a pattern like this:
6128  // def : Pat<(i64 (zext i32:$in)),
6129  // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
6130  // 0, 32)>;
6131  // There are several 32-bit shift/rotate instructions, however, that will
6132  // clear the higher-order bits of their output, rendering the RLDICL
6133  // unnecessary. When that happens, we remove it here, and redefine the
6134  // relevant 32-bit operation to be a 64-bit operation.
6135 
6136  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
6137 
6138  bool MadeChange = false;
6139  while (Position != CurDAG->allnodes_begin()) {
6140  SDNode *N = &*--Position;
6141  // Skip dead nodes and any non-machine opcodes.
6142  if (N->use_empty() || !N->isMachineOpcode())
6143  continue;
6144 
6145  if (N->getMachineOpcode() != PPC::RLDICL)
6146  continue;
6147 
6148  if (N->getConstantOperandVal(1) != 0 ||
6149  N->getConstantOperandVal(2) != 32)
6150  continue;
6151 
6152  SDValue ISR = N->getOperand(0);
6153  if (!ISR.isMachineOpcode() ||
6154  ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG)
6155  continue;
6156 
6157  if (!ISR.hasOneUse())
6158  continue;
6159 
6160  if (ISR.getConstantOperandVal(2) != PPC::sub_32)
6161  continue;
6162 
6163  SDValue IDef = ISR.getOperand(0);
6164  if (!IDef.isMachineOpcode() ||
6165  IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF)
6166  continue;
6167 
6168  // We now know that we're looking at a canonical i32 -> i64 zext. See if we
6169  // can get rid of it.
6170 
6171  SDValue Op32 = ISR->getOperand(1);
6172  if (!Op32.isMachineOpcode())
6173  continue;
6174 
6175  // There are some 32-bit instructions that always clear the high-order 32
6176  // bits, there are also some instructions (like AND) that we can look
6177  // through.
6178  SmallPtrSet<SDNode *, 16> ToPromote;
6179  if (!PeepholePPC64ZExtGather(Op32, ToPromote))
6180  continue;
6181 
6182  // If the ToPromote set contains nodes that have uses outside of the set
6183  // (except for the original INSERT_SUBREG), then abort the transformation.
6184  bool OutsideUse = false;
6185  for (SDNode *PN : ToPromote) {
6186  for (SDNode *UN : PN->uses()) {
6187  if (!ToPromote.count(UN) && UN != ISR.getNode()) {
6188  OutsideUse = true;
6189  break;
6190  }
6191  }
6192 
6193  if (OutsideUse)
6194  break;
6195  }
6196  if (OutsideUse)
6197  continue;
6198 
6199  MadeChange = true;
6200 
6201  // We now know that this zero extension can be removed by promoting to
6202  // nodes in ToPromote to 64-bit operations, where for operations in the
6203  // frontier of the set, we need to insert INSERT_SUBREGs for their
6204  // operands.
6205  for (SDNode *PN : ToPromote) {
6206  unsigned NewOpcode;
6207  switch (PN->getMachineOpcode()) {
6208  default:
6209  llvm_unreachable("Don't know the 64-bit variant of this instruction");
6210  case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break;
6211  case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break;
6212  case PPC::SLW: NewOpcode = PPC::SLW8; break;
6213  case PPC::SRW: NewOpcode = PPC::SRW8; break;
6214  case PPC::LI: NewOpcode = PPC::LI8; break;
6215  case PPC::LIS: NewOpcode = PPC::LIS8; break;
6216  case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break;
6217  case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break;
6218  case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break;
6219  case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break;
6220  case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break;
6221  case PPC::OR: NewOpcode = PPC::OR8; break;
6222  case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;
6223  case PPC::ORI: NewOpcode = PPC::ORI8; break;
6224  case PPC::ORIS: NewOpcode = PPC::ORIS8; break;
6225  case PPC::AND: NewOpcode = PPC::AND8; break;
6226  case PPC::ANDIo: NewOpcode = PPC::ANDIo8; break;
6227  case PPC::ANDISo: NewOpcode = PPC::ANDISo8; break;
6228  }
6229 
6230  // Note: During the replacement process, the nodes will be in an
6231  // inconsistent state (some instructions will have operands with values
6232  // of the wrong type). Once done, however, everything should be right
6233  // again.
6234 
6236  for (const SDValue &V : PN->ops()) {
6237  if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 &&
6238  !isa<ConstantSDNode>(V)) {
6239  SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) };
6240  SDNode *ReplOp =
6241  CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V),
6242  ISR.getNode()->getVTList(), ReplOpOps);
6243  Ops.push_back(SDValue(ReplOp, 0));
6244  } else {
6245  Ops.push_back(V);
6246  }
6247  }
6248 
6249  // Because all to-be-promoted nodes only have users that are other
6250  // promoted nodes (or the original INSERT_SUBREG), we can safely replace
6251  // the i32 result value type with i64.
6252 
6253  SmallVector<EVT, 2> NewVTs;
6254  SDVTList VTs = PN->getVTList();
6255  for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i)
6256  if (VTs.VTs[i] == MVT::i32)
6257  NewVTs.push_back(MVT::i64);
6258  else
6259  NewVTs.push_back(VTs.VTs[i]);
6260 
6261  LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: ");
6262  LLVM_DEBUG(PN->dump(CurDAG));
6263 
6264  CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops);
6265 
6266  LLVM_DEBUG(dbgs() << "\nNew: ");
6267  LLVM_DEBUG(PN->dump(CurDAG));
6268  LLVM_DEBUG(dbgs() << "\n");
6269  }
6270 
6271  // Now we replace the original zero extend and its associated INSERT_SUBREG
6272  // with the value feeding the INSERT_SUBREG (which has now been promoted to
6273  // return an i64).
6274 
6275  LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: ");
6276  LLVM_DEBUG(N->dump(CurDAG));
6277  LLVM_DEBUG(dbgs() << "\nNew: ");
6278  LLVM_DEBUG(Op32.getNode()->dump(CurDAG));
6279  LLVM_DEBUG(dbgs() << "\n");
6280 
6281  ReplaceUses(N, Op32.getNode());
6282  }
6283 
6284  if (MadeChange)
6285  CurDAG->RemoveDeadNodes();
6286 }
6287 
6288 void PPCDAGToDAGISel::PeepholePPC64() {
6289  // These optimizations are currently supported only for 64-bit SVR4.
6290  if (PPCSubTarget->isDarwin() || !PPCSubTarget->isPPC64())
6291  return;
6292 
6293  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
6294 
6295  while (Position != CurDAG->allnodes_begin()) {
6296  SDNode *N = &*--Position;
6297  // Skip dead nodes and any non-machine opcodes.
6298  if (N->use_empty() || !N->isMachineOpcode())
6299  continue;
6300 
6301  unsigned FirstOp;
6302  unsigned StorageOpcode = N->getMachineOpcode();
6303  bool RequiresMod4Offset = false;
6304 
6305  switch (StorageOpcode) {
6306  default: continue;
6307 
6308  case PPC::LWA:
6309  case PPC::LD:
6310  case PPC::DFLOADf64:
6311  case PPC::DFLOADf32:
6312  RequiresMod4Offset = true;
6314  case PPC::LBZ:
6315  case PPC::LBZ8:
6316  case PPC::LFD:
6317  case PPC::LFS:
6318  case PPC::LHA:
6319  case PPC::LHA8:
6320  case PPC::LHZ:
6321  case PPC::LHZ8:
6322  case PPC::LWZ:
6323  case PPC::LWZ8:
6324  FirstOp = 0;
6325  break;
6326 
6327  case PPC::STD:
6328  case PPC::DFSTOREf64:
6329  case PPC::DFSTOREf32:
6330  RequiresMod4Offset = true;
6332  case PPC::STB:
6333  case PPC::STB8:
6334  case PPC::STFD:
6335  case PPC::STFS:
6336  case PPC::STH:
6337  case PPC::STH8:
6338  case PPC::STW:
6339  case PPC::STW8:
6340  FirstOp = 1;
6341  break;
6342  }
6343 
6344  // If this is a load or store with a zero offset, or within the alignment,
6345  // we may be able to fold an add-immediate into the memory operation.
6346  // The check against alignment is below, as it can't occur until we check
6347  // the arguments to N
6348  if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
6349  continue;
6350 
6351  SDValue Base = N->getOperand(FirstOp + 1);
6352  if (!Base.isMachineOpcode())
6353  continue;
6354 
6355  unsigned Flags = 0;
6356  bool ReplaceFlags = true;
6357 
6358  // When the feeding operation is an add-immediate of some sort,
6359  // determine whether we need to add relocation information to the
6360  // target flags on the immediate operand when we fold it into the
6361  // load instruction.
6362  //
6363  // For something like ADDItocL, the relocation information is
6364  // inferred from the opcode; when we process it in the AsmPrinter,
6365  // we add the necessary relocation there. A load, though, can receive
6366  // relocation from various flavors of ADDIxxx, so we need to carry
6367  // the relocation information in the target flags.
6368  switch (Base.getMachineOpcode()) {
6369  default: continue;
6370 
6371  case PPC::ADDI8:
6372  case PPC::ADDI:
6373  // In some cases (such as TLS) the relocation information
6374  // is already in place on the operand, so copying the operand
6375  // is sufficient.
6376  ReplaceFlags = false;
6377  // For these cases, the immediate may not be divisible by 4, in
6378  // which case the fold is illegal for DS-form instructions. (The
6379  // other cases provide aligned addresses and are always safe.)
6380  if (RequiresMod4Offset &&
6381  (!isa<ConstantSDNode>(Base.getOperand(1)) ||
6382  Base.getConstantOperandVal(1) % 4 != 0))
6383  continue;
6384  break;
6385  case PPC::ADDIdtprelL:
6386  Flags = PPCII::MO_DTPREL_LO;
6387  break;
6388  case PPC::ADDItlsldL:
6389  Flags = PPCII::MO_TLSLD_LO;
6390  break;
6391  case PPC::ADDItocL:
6392  Flags = PPCII::MO_TOC_LO;
6393  break;
6394  }
6395 
6396  SDValue ImmOpnd = Base.getOperand(1);
6397 
6398  // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
6399  // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
6400  // we might have needed different @ha relocation values for the offset
6401  // pointers).
6402  int MaxDisplacement = 7;
6403  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
6404  const GlobalValue *GV = GA->getGlobal();
6405  MaxDisplacement = std::min((int) GV->getAlignment() - 1, MaxDisplacement);
6406  }
6407 
6408  bool UpdateHBase = false;
6409  SDValue HBase = Base.getOperand(0);
6410 
6411  int Offset = N->getConstantOperandVal(FirstOp);
6412  if (ReplaceFlags) {
6413  if (Offset < 0 || Offset > MaxDisplacement) {
6414  // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
6415  // one use, then we can do this for any offset, we just need to also
6416  // update the offset (i.e. the symbol addend) on the addis also.
6417  if (Base.getMachineOpcode() != PPC::ADDItocL)
6418  continue;
6419 
6420  if (!HBase.isMachineOpcode() ||
6421  HBase.getMachineOpcode() != PPC::ADDIStocHA)
6422  continue;
6423 
6424  if (!Base.hasOneUse() || !HBase.hasOneUse())
6425  continue;
6426 
6427  SDValue HImmOpnd = HBase.getOperand(1);
6428  if (HImmOpnd != ImmOpnd)
6429  continue;
6430 
6431  UpdateHBase = true;
6432  }
6433  } else {
6434  // If we're directly folding the addend from an addi instruction, then:
6435  // 1. In general, the offset on the memory access must be zero.
6436  // 2. If the addend is a constant, then it can be combined with a
6437  // non-zero offset, but only if the result meets the encoding
6438  // requirements.
6439  if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
6440  Offset += C->getSExtValue();
6441 
6442  if (RequiresMod4Offset && (Offset % 4) != 0)
6443  continue;
6444 
6445  if (!isInt<16>(Offset))
6446  continue;
6447 
6448  ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
6449  ImmOpnd.getValueType());
6450  } else if (Offset != 0) {
6451  continue;
6452  }
6453  }
6454 
6455  // We found an opportunity. Reverse the operands from the add
6456  // immediate and substitute them into the load or store. If
6457  // needed, update the target flags for the immediate operand to
6458  // reflect the necessary relocation information.
6459  LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
6460  LLVM_DEBUG(Base->dump(CurDAG));
6461  LLVM_DEBUG(dbgs() << "\nN: ");
6462  LLVM_DEBUG(N->dump(CurDAG));
6463  LLVM_DEBUG(dbgs() << "\n");
6464 
6465  // If the relocation information isn't already present on the
6466  // immediate operand, add it now.
6467  if (ReplaceFlags) {
6468  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
6469  SDLoc dl(GA);
6470  const GlobalValue *GV = GA->getGlobal();
6471  // We can't perform this optimization for data whose alignment
6472  // is insufficient for the instruction encoding.
6473  if (GV->getAlignment() < 4 &&
6474  (RequiresMod4Offset || (Offset % 4) != 0)) {
6475  LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
6476  continue;
6477  }
6478  ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
6479  } else if (ConstantPoolSDNode *CP =
6480  dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
6481  const Constant *C = CP->getConstVal();
6482  ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64,
6483  CP->getAlignment(),
6484  Offset, Flags);
6485  }
6486  }
6487 
6488  if (FirstOp == 1) // Store
6489  (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
6490  Base.getOperand(0), N->getOperand(3));
6491  else // Load
6492  (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
6493  N->getOperand(2));
6494 
6495  if (UpdateHBase)
6496  (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),
6497  ImmOpnd);
6498 
6499  // The add-immediate may now be dead, in which case remove it.
6500  if (Base.getNode()->use_empty())
6501  CurDAG->RemoveDeadNode(Base.getNode());
6502  }
6503 }
6504 
6505 /// createPPCISelDag - This pass converts a legalized DAG into a
6506 /// PowerPC-specific DAG, ready for instruction scheduling.
6507 ///
6509  CodeGenOpt::Level OptLevel) {
6510  return new PPCDAGToDAGISel(TM, OptLevel);
6511 }
uint64_t CallInst * C
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:349
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOffset() const
bool isUndef() const
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
T findLastSet(T Val, ZeroBehavior ZB=ZB_Max)
Get the index of the last set bit starting from the least significant bit.
Definition: MathExtras.h:244
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1563
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
static cl::opt< bool > UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true), cl::desc("use aggressive ppc isel for bit permutations"), cl::Hidden)
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:650
This class represents lattice values for constants.
Definition: AllocatorList.h:24
GPRC = address of GLOBAL_OFFSET_TABLE.
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:367
static unsigned index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
const SDValue & getBasePtr() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
static int getRecordFormOpcode(unsigned Opcode)
const SDValue & getValue() const
SDVTList getVTList() const
unsigned Reg
static unsigned selectI64ImmInstrCountDirect(int64_t Imm)
bool hasVSX() const
Definition: PPCSubtarget.h:246
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
static cl::opt< ICmpInGPRType > CmpInGPR("ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All), cl::desc("Specify the types of comparisons to emit GPR-only code for."), cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."), clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."), clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."), clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."), clEnumValN(ICGPR_NonExtIn, "nonextin", "Only comparisons where inputs don't need [sz]ext."), clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."), clEnumValN(ICGPR_ZextI32, "zexti32", "Only i32 comparisons with zext result."), clEnumValN(ICGPR_ZextI64, "zexti64", "Only i64 comparisons with zext result."), clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."), clEnumValN(ICGPR_SextI32, "sexti32", "Only i32 comparisons with sext result."), clEnumValN(ICGPR_SextI64, "sexti64", "Only i64 comparisons with sext result.")))
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:117
const SDValue & getChain() const
bool hasQPX() const
Definition: PPCSubtarget.h:245
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
static cl::opt< bool > BPermRewriterNoMasking("ppc-bit-perm-rewriter-stress-rotates", cl::desc("stress rotate selection in aggressive ppc isel for " "bit permutations"), cl::Hidden)
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:131
bool isBitwiseNot(SDValue V)
Returns true if V is a bitwise not operation.
unsigned second
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:306
static bool isInt32Immediate(SDNode *N, unsigned &Imm)
isInt32Immediate - This method tests to see if the node is a 32-bit constant operand.
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
F(f)
CALL - A direct function call.
static cl::opt< bool > EnableTLSOpt("ppc-tls-opt", cl::init(true), cl::desc("Enable tls optimization peephole"), cl::Hidden)
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:138
static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned &Imm)
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
bool hasSPE() const
Definition: PPCSubtarget.h:243
unsigned int NumVTs
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:435
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:998
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
GlobalBaseReg - On Darwin, this node represents the result of the mflr at function entry...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:136
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
bool isISA3_0() const
Definition: PPCSubtarget.h:290
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
const HexagonInstrInfo * TII
Shift and rotation operations.
Definition: ISDOpcodes.h:410
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:478
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
uint64_t getConstantOperandVal(unsigned i) const
#define INT64_MAX
Definition: DataTypes.h:77
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
SimpleValueType SimpleTy
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:401
unsigned getID() const
Return the register class ID number.
Position
Position to insert a new instruction relative to an existing instruction.
bool isTargetELF() const
Definition: PPCSubtarget.h:305
ELFYAML::ELF_STO Other
Definition: ELFYAML.cpp:784
This file implements a class to represent arbitrary precision integral constant values and operations...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
int64_t getSExtValue() const
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:411
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
bool isELFv2ABI() const
#define T
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:959
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:48
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification, or lowering of the constant.
Definition: ISDOpcodes.h:125
unsigned getAlignment() const
Definition: Globals.cpp:97
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:576
ArrayRef< SDUse > ops() const
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:992
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
This class is used to represent ISD::STORE nodes.
TargetInstrInfo - Interface to description of machine instruction set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:636
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
const SDValue & getBasePtr() const
bool isSecurePlt() const
Definition: PPCSubtarget.h:262
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
Machine Value Type.
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static SDNode * selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl, int64_t Imm)
bool isMachineOpcode() const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:643
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
const SDValue & getOperand(unsigned Num) const
static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC)
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
static ManagedStatic< OptionRegistry > OR
Definition: Options.cpp:31
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
const SDValue & getOffset() const
This class provides iterator support for SDUse operands that use a specific SDNode.
unsigned getMachineOpcode() const
const PPCTargetLowering * getTargetLowering() const override
Definition: PPCSubtarget.h:183
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
static bool isInt64Immediate(SDNode *N, uint64_t &Imm)
isInt64Immediate - This method tests to see if the node is a 64-bit constant operand.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:57
Common code between 32-bit and 64-bit PowerPC targets.
auto remove_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1226
Extended Value Type.
Definition: ValueTypes.h:34
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
const MachineBasicBlock & front() const
bool isLittleEndian() const
Definition: PPCSubtarget.h:228
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
MachineBasicBlock * MBB
MBB - The current block.
bool hasP8Vector() const
Definition: PPCSubtarget.h:247
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
BranchProbability getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const
Get an edge&#39;s probability, relative to other out-edges of the Src.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1116
bool use_empty() const
Return true if there are no uses of this node.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
void dump() const
Dump this node, for debugging.
const PPCRegisterInfo * getRegisterInfo() const override
Definition: PPCSubtarget.h:189
static bool PeepholePPC64ZExtGather(SDValue Op32, SmallPtrSetImpl< SDNode *> &ToPromote)
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:404
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:309
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:222
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:182
static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, bool &NeedSwapOps, bool &IsUnCmp)
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS model, produces an ADD instruction that ...
Module.h This file contains the declarations for the Module class.
bool hasCMPB() const
Definition: PPCSubtarget.h:256
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:27
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:96
An SDNode that represents everything that will be needed to construct a MachineInstr.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
CHAIN = SC CHAIN, Imm128 - System call.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
Return the operation corresponding to !(X op Y), where &#39;op&#39; is a valid SetCC operation.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
These values identify relocations on immediates folded into memory operations.
Definition: PPC.h:106
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after execu...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
EVT getMemoryVT() const
Return the type of the in-memory value.
Target - Wrapper for Target specific information.
CodeModel::Model getCodeModel() const
Returns the code model.
iterator_range< use_iterator > uses()
BranchProbabilityInfo * BPI
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:420
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:468
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:471
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2...
int getMaskElt(unsigned Idx) const
static SDNode * selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, int64_t Imm)
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:618
static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:546
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:387
FunctionPass * createPPCISelDag(PPCTargetMachine &TM, CodeGenOpt::Level OL)
createPPCISelDag - This pass converts a legalized DAG into a PowerPC-specific DAG, ready for instruction scheduling.
iterator begin() const
Definition: SmallPtrSet.h:397
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
bool isPositionIndependent() const
static unsigned selectI64ImmInstrCount(int64_t Imm)
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:614
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
The CMPB instruction (takes two operands of i32 or i64).
static cl::opt< bool > EnableBranchHint("ppc-use-branch-hint", cl::init(true), cl::desc("Enable static hinting of branches on ppc"), cl::Hidden)
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
bool useCRBits() const
useCRBits - Return true if we should store and manipulate i1 values in the individual condition regis...
Definition: PPCSubtarget.h:220
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
ICmpInGPRType
unsigned getOpcode() const
SDValue getValue(unsigned R) const
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:346
bool isDarwin() const
isDarwin - True if this is any darwin platform.
Definition: PPCSubtarget.h:301
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:749
iterator end() const
Definition: SmallPtrSet.h:402
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:175
const unsigned Kind
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side...
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:326
static uint64_t Rot64(uint64_t Imm, unsigned R)
static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo, const SDValue &DestMBB)
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566
LLVM Value Representation.
Definition: Value.h:73
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
unsigned getResNo() const
get the index which selects a specific result in the SDNode
unsigned getSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert)
getCRIdxForSetCC - Return the index of the condition register field associated with the SetCC conditi...
PICLevel::Level getPICLevel() const
Returns the PIC level (small or large model)
Definition: Module.cpp:490
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:443
bool isSVR4ABI() const
Definition: PPCSubtarget.h:310
unsigned getNumOperands() const
Conversion operators.
Definition: ISDOpcodes.h:465
const SDValue & getOperand(unsigned i) const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:474
#define LLVM_DEBUG(X)
Definition: Debug.h:123
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
On a symbol operand "FOO", this indicates that the reference is actually to "FOO@plt".
Definition: PPC.h:79
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate...
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:375
XXPERMDI - The PPC XXPERMDI instruction.
virtual const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const
Returns a TargetRegisterClass used for pointer values.
BRIND - Indirect branch.
Definition: ISDOpcodes.h:634
static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, bool HasVSX, bool &Swap, bool &Negate)
void resize(size_type N)
Definition: SmallVector.h:351
This class is used to represent ISD::LOAD nodes.