LLVM  8.0.1
AArch64InstrInfo.cpp
Go to the documentation of this file.
1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the AArch64 implementation of the TargetInstrInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64InstrInfo.h"
16 #include "AArch64Subtarget.h"
18 #include "Utils/AArch64BaseInfo.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/CodeGen/StackMaps.h"
34 #include "llvm/IR/DebugLoc.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/MC/MCInst.h"
37 #include "llvm/MC/MCInstrDesc.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/CodeGen.h"
41 #include "llvm/Support/Compiler.h"
46 #include <cassert>
47 #include <cstdint>
48 #include <iterator>
49 #include <utility>
50 
51 using namespace llvm;
52 
53 #define GET_INSTRINFO_CTOR_DTOR
54 #include "AArch64GenInstrInfo.inc"
55 
57  "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
58  cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
59 
61  "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
62  cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
63 
64 static cl::opt<unsigned>
65  BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
66  cl::desc("Restrict range of Bcc instructions (DEBUG)"));
67 
69  : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
70  AArch64::CATCHRET),
71  RI(STI.getTargetTriple()), Subtarget(STI) {}
72 
73 /// GetInstSize - Return the number of bytes of code the specified
74 /// instruction may be. This returns the maximum number of bytes.
76  const MachineBasicBlock &MBB = *MI.getParent();
77  const MachineFunction *MF = MBB.getParent();
78  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
79 
80  if (MI.getOpcode() == AArch64::INLINEASM)
81  return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
82 
83  // FIXME: We currently only handle pseudoinstructions that don't get expanded
84  // before the assembly printer.
85  unsigned NumBytes = 0;
86  const MCInstrDesc &Desc = MI.getDesc();
87  switch (Desc.getOpcode()) {
88  default:
89  // Anything not explicitly designated otherwise is a normal 4-byte insn.
90  NumBytes = 4;
91  break;
92  case TargetOpcode::DBG_VALUE:
94  case TargetOpcode::IMPLICIT_DEF:
95  case TargetOpcode::KILL:
96  NumBytes = 0;
97  break;
98  case TargetOpcode::STACKMAP:
99  // The upper bound for a stackmap intrinsic is the full length of its shadow
100  NumBytes = StackMapOpers(&MI).getNumPatchBytes();
101  assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
102  break;
103  case TargetOpcode::PATCHPOINT:
104  // The size of the patchpoint intrinsic is the number of bytes requested
105  NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
106  assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
107  break;
109  // This gets lowered to an instruction sequence which takes 16 bytes
110  NumBytes = 16;
111  break;
112  case AArch64::JumpTableDest32:
113  case AArch64::JumpTableDest16:
114  case AArch64::JumpTableDest8:
115  NumBytes = 12;
116  break;
117  case AArch64::SPACE:
118  NumBytes = MI.getOperand(1).getImm();
119  break;
120  }
121 
122  return NumBytes;
123 }
124 
127  // Block ends with fall-through condbranch.
128  switch (LastInst->getOpcode()) {
129  default:
130  llvm_unreachable("Unknown branch instruction?");
131  case AArch64::Bcc:
132  Target = LastInst->getOperand(1).getMBB();
133  Cond.push_back(LastInst->getOperand(0));
134  break;
135  case AArch64::CBZW:
136  case AArch64::CBZX:
137  case AArch64::CBNZW:
138  case AArch64::CBNZX:
139  Target = LastInst->getOperand(1).getMBB();
141  Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
142  Cond.push_back(LastInst->getOperand(0));
143  break;
144  case AArch64::TBZW:
145  case AArch64::TBZX:
146  case AArch64::TBNZW:
147  case AArch64::TBNZX:
148  Target = LastInst->getOperand(2).getMBB();
150  Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
151  Cond.push_back(LastInst->getOperand(0));
152  Cond.push_back(LastInst->getOperand(1));
153  }
154 }
155 
156 static unsigned getBranchDisplacementBits(unsigned Opc) {
157  switch (Opc) {
158  default:
159  llvm_unreachable("unexpected opcode!");
160  case AArch64::B:
161  return 64;
162  case AArch64::TBNZW:
163  case AArch64::TBZW:
164  case AArch64::TBNZX:
165  case AArch64::TBZX:
166  return TBZDisplacementBits;
167  case AArch64::CBNZW:
168  case AArch64::CBZW:
169  case AArch64::CBNZX:
170  case AArch64::CBZX:
171  return CBZDisplacementBits;
172  case AArch64::Bcc:
173  return BCCDisplacementBits;
174  }
175 }
176 
178  int64_t BrOffset) const {
179  unsigned Bits = getBranchDisplacementBits(BranchOp);
180  assert(Bits >= 3 && "max branch displacement must be enough to jump"
181  "over conditional branch expansion");
182  return isIntN(Bits, BrOffset / 4);
183 }
184 
187  switch (MI.getOpcode()) {
188  default:
189  llvm_unreachable("unexpected opcode!");
190  case AArch64::B:
191  return MI.getOperand(0).getMBB();
192  case AArch64::TBZW:
193  case AArch64::TBNZW:
194  case AArch64::TBZX:
195  case AArch64::TBNZX:
196  return MI.getOperand(2).getMBB();
197  case AArch64::CBZW:
198  case AArch64::CBNZW:
199  case AArch64::CBZX:
200  case AArch64::CBNZX:
201  case AArch64::Bcc:
202  return MI.getOperand(1).getMBB();
203  }
204 }
205 
206 // Branch analysis.
208  MachineBasicBlock *&TBB,
209  MachineBasicBlock *&FBB,
211  bool AllowModify) const {
212  // If the block has no terminators, it just falls into the block after it.
214  if (I == MBB.end())
215  return false;
216 
217  if (!isUnpredicatedTerminator(*I))
218  return false;
219 
220  // Get the last instruction in the block.
221  MachineInstr *LastInst = &*I;
222 
223  // If there is only one terminator instruction, process it.
224  unsigned LastOpc = LastInst->getOpcode();
225  if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
226  if (isUncondBranchOpcode(LastOpc)) {
227  TBB = LastInst->getOperand(0).getMBB();
228  return false;
229  }
230  if (isCondBranchOpcode(LastOpc)) {
231  // Block ends with fall-through condbranch.
232  parseCondBranch(LastInst, TBB, Cond);
233  return false;
234  }
235  return true; // Can't handle indirect branch.
236  }
237 
238  // Get the instruction before it if it is a terminator.
239  MachineInstr *SecondLastInst = &*I;
240  unsigned SecondLastOpc = SecondLastInst->getOpcode();
241 
242  // If AllowModify is true and the block ends with two or more unconditional
243  // branches, delete all but the first unconditional branch.
244  if (AllowModify && isUncondBranchOpcode(LastOpc)) {
245  while (isUncondBranchOpcode(SecondLastOpc)) {
246  LastInst->eraseFromParent();
247  LastInst = SecondLastInst;
248  LastOpc = LastInst->getOpcode();
249  if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
250  // Return now the only terminator is an unconditional branch.
251  TBB = LastInst->getOperand(0).getMBB();
252  return false;
253  } else {
254  SecondLastInst = &*I;
255  SecondLastOpc = SecondLastInst->getOpcode();
256  }
257  }
258  }
259 
260  // If there are three terminators, we don't know what sort of block this is.
261  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
262  return true;
263 
264  // If the block ends with a B and a Bcc, handle it.
265  if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
266  parseCondBranch(SecondLastInst, TBB, Cond);
267  FBB = LastInst->getOperand(0).getMBB();
268  return false;
269  }
270 
271  // If the block ends with two unconditional branches, handle it. The second
272  // one is not executed, so remove it.
273  if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
274  TBB = SecondLastInst->getOperand(0).getMBB();
275  I = LastInst;
276  if (AllowModify)
277  I->eraseFromParent();
278  return false;
279  }
280 
281  // ...likewise if it ends with an indirect branch followed by an unconditional
282  // branch.
283  if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
284  I = LastInst;
285  if (AllowModify)
286  I->eraseFromParent();
287  return true;
288  }
289 
290  // Otherwise, can't handle this.
291  return true;
292 }
293 
295  SmallVectorImpl<MachineOperand> &Cond) const {
296  if (Cond[0].getImm() != -1) {
297  // Regular Bcc
298  AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
299  Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
300  } else {
301  // Folded compare-and-branch
302  switch (Cond[1].getImm()) {
303  default:
304  llvm_unreachable("Unknown conditional branch!");
305  case AArch64::CBZW:
306  Cond[1].setImm(AArch64::CBNZW);
307  break;
308  case AArch64::CBNZW:
309  Cond[1].setImm(AArch64::CBZW);
310  break;
311  case AArch64::CBZX:
312  Cond[1].setImm(AArch64::CBNZX);
313  break;
314  case AArch64::CBNZX:
315  Cond[1].setImm(AArch64::CBZX);
316  break;
317  case AArch64::TBZW:
318  Cond[1].setImm(AArch64::TBNZW);
319  break;
320  case AArch64::TBNZW:
321  Cond[1].setImm(AArch64::TBZW);
322  break;
323  case AArch64::TBZX:
324  Cond[1].setImm(AArch64::TBNZX);
325  break;
326  case AArch64::TBNZX:
327  Cond[1].setImm(AArch64::TBZX);
328  break;
329  }
330  }
331 
332  return false;
333 }
334 
336  int *BytesRemoved) const {
338  if (I == MBB.end())
339  return 0;
340 
341  if (!isUncondBranchOpcode(I->getOpcode()) &&
342  !isCondBranchOpcode(I->getOpcode()))
343  return 0;
344 
345  // Remove the branch.
346  I->eraseFromParent();
347 
348  I = MBB.end();
349 
350  if (I == MBB.begin()) {
351  if (BytesRemoved)
352  *BytesRemoved = 4;
353  return 1;
354  }
355  --I;
356  if (!isCondBranchOpcode(I->getOpcode())) {
357  if (BytesRemoved)
358  *BytesRemoved = 4;
359  return 1;
360  }
361 
362  // Remove the branch.
363  I->eraseFromParent();
364  if (BytesRemoved)
365  *BytesRemoved = 8;
366 
367  return 2;
368 }
369 
370 void AArch64InstrInfo::instantiateCondBranch(
371  MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
372  ArrayRef<MachineOperand> Cond) const {
373  if (Cond[0].getImm() != -1) {
374  // Regular Bcc
375  BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
376  } else {
377  // Folded compare-and-branch
378  // Note that we use addOperand instead of addReg to keep the flags.
379  const MachineInstrBuilder MIB =
380  BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
381  if (Cond.size() > 3)
382  MIB.addImm(Cond[3].getImm());
383  MIB.addMBB(TBB);
384  }
385 }
386 
389  ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
390  // Shouldn't be a fall through.
391  assert(TBB && "insertBranch must not be told to insert a fallthrough");
392 
393  if (!FBB) {
394  if (Cond.empty()) // Unconditional branch?
395  BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
396  else
397  instantiateCondBranch(MBB, DL, TBB, Cond);
398 
399  if (BytesAdded)
400  *BytesAdded = 4;
401 
402  return 1;
403  }
404 
405  // Two-way conditional branch.
406  instantiateCondBranch(MBB, DL, TBB, Cond);
407  BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
408 
409  if (BytesAdded)
410  *BytesAdded = 8;
411 
412  return 2;
413 }
414 
415 // Find the original register that VReg is copied from.
416 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
418  const MachineInstr *DefMI = MRI.getVRegDef(VReg);
419  if (!DefMI->isFullCopy())
420  return VReg;
421  VReg = DefMI->getOperand(1).getReg();
422  }
423  return VReg;
424 }
425 
426 // Determine if VReg is defined by an instruction that can be folded into a
427 // csel instruction. If so, return the folded opcode, and the replacement
428 // register.
429 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
430  unsigned *NewVReg = nullptr) {
431  VReg = removeCopies(MRI, VReg);
433  return 0;
434 
435  bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
436  const MachineInstr *DefMI = MRI.getVRegDef(VReg);
437  unsigned Opc = 0;
438  unsigned SrcOpNum = 0;
439  switch (DefMI->getOpcode()) {
440  case AArch64::ADDSXri:
441  case AArch64::ADDSWri:
442  // if NZCV is used, do not fold.
443  if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
444  return 0;
445  // fall-through to ADDXri and ADDWri.
447  case AArch64::ADDXri:
448  case AArch64::ADDWri:
449  // add x, 1 -> csinc.
450  if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
451  DefMI->getOperand(3).getImm() != 0)
452  return 0;
453  SrcOpNum = 1;
454  Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
455  break;
456 
457  case AArch64::ORNXrr:
458  case AArch64::ORNWrr: {
459  // not x -> csinv, represented as orn dst, xzr, src.
460  unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
461  if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
462  return 0;
463  SrcOpNum = 2;
464  Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
465  break;
466  }
467 
468  case AArch64::SUBSXrr:
469  case AArch64::SUBSWrr:
470  // if NZCV is used, do not fold.
471  if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
472  return 0;
473  // fall-through to SUBXrr and SUBWrr.
475  case AArch64::SUBXrr:
476  case AArch64::SUBWrr: {
477  // neg x -> csneg, represented as sub dst, xzr, src.
478  unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
479  if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
480  return 0;
481  SrcOpNum = 2;
482  Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
483  break;
484  }
485  default:
486  return 0;
487  }
488  assert(Opc && SrcOpNum && "Missing parameters");
489 
490  if (NewVReg)
491  *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
492  return Opc;
493 }
494 
497  unsigned TrueReg, unsigned FalseReg,
498  int &CondCycles, int &TrueCycles,
499  int &FalseCycles) const {
500  // Check register classes.
501  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
502  const TargetRegisterClass *RC =
503  RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
504  if (!RC)
505  return false;
506 
507  // Expanding cbz/tbz requires an extra cycle of latency on the condition.
508  unsigned ExtraCondLat = Cond.size() != 1;
509 
510  // GPRs are handled by csel.
511  // FIXME: Fold in x+1, -x, and ~x when applicable.
512  if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
513  AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
514  // Single-cycle csel, csinc, csinv, and csneg.
515  CondCycles = 1 + ExtraCondLat;
516  TrueCycles = FalseCycles = 1;
517  if (canFoldIntoCSel(MRI, TrueReg))
518  TrueCycles = 0;
519  else if (canFoldIntoCSel(MRI, FalseReg))
520  FalseCycles = 0;
521  return true;
522  }
523 
524  // Scalar floating point is handled by fcsel.
525  // FIXME: Form fabs, fmin, and fmax when applicable.
526  if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
527  AArch64::FPR32RegClass.hasSubClassEq(RC)) {
528  CondCycles = 5 + ExtraCondLat;
529  TrueCycles = FalseCycles = 2;
530  return true;
531  }
532 
533  // Can't do vectors.
534  return false;
535 }
536 
539  const DebugLoc &DL, unsigned DstReg,
541  unsigned TrueReg, unsigned FalseReg) const {
543 
544  // Parse the condition code, see parseCondBranch() above.
546  switch (Cond.size()) {
547  default:
548  llvm_unreachable("Unknown condition opcode in Cond");
549  case 1: // b.cc
550  CC = AArch64CC::CondCode(Cond[0].getImm());
551  break;
552  case 3: { // cbz/cbnz
553  // We must insert a compare against 0.
554  bool Is64Bit;
555  switch (Cond[1].getImm()) {
556  default:
557  llvm_unreachable("Unknown branch opcode in Cond");
558  case AArch64::CBZW:
559  Is64Bit = false;
560  CC = AArch64CC::EQ;
561  break;
562  case AArch64::CBZX:
563  Is64Bit = true;
564  CC = AArch64CC::EQ;
565  break;
566  case AArch64::CBNZW:
567  Is64Bit = false;
568  CC = AArch64CC::NE;
569  break;
570  case AArch64::CBNZX:
571  Is64Bit = true;
572  CC = AArch64CC::NE;
573  break;
574  }
575  unsigned SrcReg = Cond[2].getReg();
576  if (Is64Bit) {
577  // cmp reg, #0 is actually subs xzr, reg, #0.
578  MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
579  BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
580  .addReg(SrcReg)
581  .addImm(0)
582  .addImm(0);
583  } else {
584  MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
585  BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
586  .addReg(SrcReg)
587  .addImm(0)
588  .addImm(0);
589  }
590  break;
591  }
592  case 4: { // tbz/tbnz
593  // We must insert a tst instruction.
594  switch (Cond[1].getImm()) {
595  default:
596  llvm_unreachable("Unknown branch opcode in Cond");
597  case AArch64::TBZW:
598  case AArch64::TBZX:
599  CC = AArch64CC::EQ;
600  break;
601  case AArch64::TBNZW:
602  case AArch64::TBNZX:
603  CC = AArch64CC::NE;
604  break;
605  }
606  // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
607  if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
608  BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
609  .addReg(Cond[2].getReg())
610  .addImm(
611  AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
612  else
613  BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
614  .addReg(Cond[2].getReg())
615  .addImm(
616  AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
617  break;
618  }
619  }
620 
621  unsigned Opc = 0;
622  const TargetRegisterClass *RC = nullptr;
623  bool TryFold = false;
624  if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
625  RC = &AArch64::GPR64RegClass;
626  Opc = AArch64::CSELXr;
627  TryFold = true;
628  } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
629  RC = &AArch64::GPR32RegClass;
630  Opc = AArch64::CSELWr;
631  TryFold = true;
632  } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
633  RC = &AArch64::FPR64RegClass;
634  Opc = AArch64::FCSELDrrr;
635  } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
636  RC = &AArch64::FPR32RegClass;
637  Opc = AArch64::FCSELSrrr;
638  }
639  assert(RC && "Unsupported regclass");
640 
641  // Try folding simple instructions into the csel.
642  if (TryFold) {
643  unsigned NewVReg = 0;
644  unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
645  if (FoldedOpc) {
646  // The folded opcodes csinc, csinc and csneg apply the operation to
647  // FalseReg, so we need to invert the condition.
649  TrueReg = FalseReg;
650  } else
651  FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
652 
653  // Fold the operation. Leave any dead instructions for DCE to clean up.
654  if (FoldedOpc) {
655  FalseReg = NewVReg;
656  Opc = FoldedOpc;
657  // The extends the live range of NewVReg.
658  MRI.clearKillFlags(NewVReg);
659  }
660  }
661 
662  // Pull all virtual register into the appropriate class.
663  MRI.constrainRegClass(TrueReg, RC);
664  MRI.constrainRegClass(FalseReg, RC);
665 
666  // Insert the csel.
667  BuildMI(MBB, I, DL, get(Opc), DstReg)
668  .addReg(TrueReg)
669  .addReg(FalseReg)
670  .addImm(CC);
671 }
672 
673 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
674 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
675  uint64_t Imm = MI.getOperand(1).getImm();
676  uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
677  uint64_t Encoding;
678  return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
679 }
680 
681 // FIXME: this implementation should be micro-architecture dependent, so a
682 // micro-architecture target hook should be introduced here in future.
684  if (!Subtarget.hasCustomCheapAsMoveHandling())
685  return MI.isAsCheapAsAMove();
686 
687  const unsigned Opcode = MI.getOpcode();
688 
689  // Firstly, check cases gated by features.
690 
691  if (Subtarget.hasZeroCycleZeroingFP()) {
692  if (Opcode == AArch64::FMOVH0 ||
693  Opcode == AArch64::FMOVS0 ||
694  Opcode == AArch64::FMOVD0)
695  return true;
696  }
697 
698  if (Subtarget.hasZeroCycleZeroingGP()) {
699  if (Opcode == TargetOpcode::COPY &&
700  (MI.getOperand(1).getReg() == AArch64::WZR ||
701  MI.getOperand(1).getReg() == AArch64::XZR))
702  return true;
703  }
704 
705  // Secondly, check cases specific to sub-targets.
706 
707  if (Subtarget.hasExynosCheapAsMoveHandling()) {
708  if (isExynosCheapAsMove(MI))
709  return true;
710 
711  return MI.isAsCheapAsAMove();
712  }
713 
714  // Finally, check generic cases.
715 
716  switch (Opcode) {
717  default:
718  return false;
719 
720  // add/sub on register without shift
721  case AArch64::ADDWri:
722  case AArch64::ADDXri:
723  case AArch64::SUBWri:
724  case AArch64::SUBXri:
725  return (MI.getOperand(3).getImm() == 0);
726 
727  // logical ops on immediate
728  case AArch64::ANDWri:
729  case AArch64::ANDXri:
730  case AArch64::EORWri:
731  case AArch64::EORXri:
732  case AArch64::ORRWri:
733  case AArch64::ORRXri:
734  return true;
735 
736  // logical ops on register without shift
737  case AArch64::ANDWrr:
738  case AArch64::ANDXrr:
739  case AArch64::BICWrr:
740  case AArch64::BICXrr:
741  case AArch64::EONWrr:
742  case AArch64::EONXrr:
743  case AArch64::EORWrr:
744  case AArch64::EORXrr:
745  case AArch64::ORNWrr:
746  case AArch64::ORNXrr:
747  case AArch64::ORRWrr:
748  case AArch64::ORRXrr:
749  return true;
750 
751  // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
752  // ORRXri, it is as cheap as MOV
753  case AArch64::MOVi32imm:
754  return canBeExpandedToORR(MI, 32);
755  case AArch64::MOVi64imm:
756  return canBeExpandedToORR(MI, 64);
757  }
758 
759  llvm_unreachable("Unknown opcode to check as cheap as a move!");
760 }
761 
763  switch (MI.getOpcode()) {
764  default:
765  return false;
766 
767  case AArch64::ADDWrs:
768  case AArch64::ADDXrs:
769  case AArch64::ADDSWrs:
770  case AArch64::ADDSXrs: {
771  unsigned Imm = MI.getOperand(3).getImm();
772  unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
773  if (ShiftVal == 0)
774  return true;
775  return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
776  }
777 
778  case AArch64::ADDWrx:
779  case AArch64::ADDXrx:
780  case AArch64::ADDXrx64:
781  case AArch64::ADDSWrx:
782  case AArch64::ADDSXrx:
783  case AArch64::ADDSXrx64: {
784  unsigned Imm = MI.getOperand(3).getImm();
785  switch (AArch64_AM::getArithExtendType(Imm)) {
786  default:
787  return false;
788  case AArch64_AM::UXTB:
789  case AArch64_AM::UXTH:
790  case AArch64_AM::UXTW:
791  case AArch64_AM::UXTX:
792  return AArch64_AM::getArithShiftValue(Imm) <= 4;
793  }
794  }
795 
796  case AArch64::SUBWrs:
797  case AArch64::SUBSWrs: {
798  unsigned Imm = MI.getOperand(3).getImm();
799  unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
800  return ShiftVal == 0 ||
801  (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
802  }
803 
804  case AArch64::SUBXrs:
805  case AArch64::SUBSXrs: {
806  unsigned Imm = MI.getOperand(3).getImm();
807  unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
808  return ShiftVal == 0 ||
809  (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
810  }
811 
812  case AArch64::SUBWrx:
813  case AArch64::SUBXrx:
814  case AArch64::SUBXrx64:
815  case AArch64::SUBSWrx:
816  case AArch64::SUBSXrx:
817  case AArch64::SUBSXrx64: {
818  unsigned Imm = MI.getOperand(3).getImm();
819  switch (AArch64_AM::getArithExtendType(Imm)) {
820  default:
821  return false;
822  case AArch64_AM::UXTB:
823  case AArch64_AM::UXTH:
824  case AArch64_AM::UXTW:
825  case AArch64_AM::UXTX:
826  return AArch64_AM::getArithShiftValue(Imm) == 0;
827  }
828  }
829 
830  case AArch64::LDRBBroW:
831  case AArch64::LDRBBroX:
832  case AArch64::LDRBroW:
833  case AArch64::LDRBroX:
834  case AArch64::LDRDroW:
835  case AArch64::LDRDroX:
836  case AArch64::LDRHHroW:
837  case AArch64::LDRHHroX:
838  case AArch64::LDRHroW:
839  case AArch64::LDRHroX:
840  case AArch64::LDRQroW:
841  case AArch64::LDRQroX:
842  case AArch64::LDRSBWroW:
843  case AArch64::LDRSBWroX:
844  case AArch64::LDRSBXroW:
845  case AArch64::LDRSBXroX:
846  case AArch64::LDRSHWroW:
847  case AArch64::LDRSHWroX:
848  case AArch64::LDRSHXroW:
849  case AArch64::LDRSHXroX:
850  case AArch64::LDRSWroW:
851  case AArch64::LDRSWroX:
852  case AArch64::LDRSroW:
853  case AArch64::LDRSroX:
854  case AArch64::LDRWroW:
855  case AArch64::LDRWroX:
856  case AArch64::LDRXroW:
857  case AArch64::LDRXroX:
858  case AArch64::PRFMroW:
859  case AArch64::PRFMroX:
860  case AArch64::STRBBroW:
861  case AArch64::STRBBroX:
862  case AArch64::STRBroW:
863  case AArch64::STRBroX:
864  case AArch64::STRDroW:
865  case AArch64::STRDroX:
866  case AArch64::STRHHroW:
867  case AArch64::STRHHroX:
868  case AArch64::STRHroW:
869  case AArch64::STRHroX:
870  case AArch64::STRQroW:
871  case AArch64::STRQroX:
872  case AArch64::STRSroW:
873  case AArch64::STRSroX:
874  case AArch64::STRWroW:
875  case AArch64::STRWroX:
876  case AArch64::STRXroW:
877  case AArch64::STRXroX: {
878  unsigned IsSigned = MI.getOperand(3).getImm();
879  return !IsSigned;
880  }
881  }
882 }
883 
885  unsigned Opc = MI.getOpcode();
886  switch (Opc) {
887  default:
888  return false;
889  case AArch64::SEH_StackAlloc:
890  case AArch64::SEH_SaveFPLR:
891  case AArch64::SEH_SaveFPLR_X:
892  case AArch64::SEH_SaveReg:
893  case AArch64::SEH_SaveReg_X:
894  case AArch64::SEH_SaveRegP:
895  case AArch64::SEH_SaveRegP_X:
896  case AArch64::SEH_SaveFReg:
897  case AArch64::SEH_SaveFReg_X:
898  case AArch64::SEH_SaveFRegP:
899  case AArch64::SEH_SaveFRegP_X:
900  case AArch64::SEH_SetFP:
901  case AArch64::SEH_AddFP:
902  case AArch64::SEH_Nop:
903  case AArch64::SEH_PrologEnd:
904  case AArch64::SEH_EpilogStart:
905  case AArch64::SEH_EpilogEnd:
906  return true;
907  }
908 }
909 
911  unsigned &SrcReg, unsigned &DstReg,
912  unsigned &SubIdx) const {
913  switch (MI.getOpcode()) {
914  default:
915  return false;
916  case AArch64::SBFMXri: // aka sxtw
917  case AArch64::UBFMXri: // aka uxtw
918  // Check for the 32 -> 64 bit extension case, these instructions can do
919  // much more.
920  if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
921  return false;
922  // This is a signed or unsigned 32 -> 64 bit extension.
923  SrcReg = MI.getOperand(1).getReg();
924  DstReg = MI.getOperand(0).getReg();
925  SubIdx = AArch64::sub_32;
926  return true;
927  }
928 }
929 
931  MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
933  MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
934  int64_t OffsetA = 0, OffsetB = 0;
935  unsigned WidthA = 0, WidthB = 0;
936 
937  assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
938  assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
939 
942  return false;
943 
944  // Retrieve the base, offset from the base and width. Width
945  // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
946  // base are identical, and the offset of a lower memory access +
947  // the width doesn't overlap the offset of a higher memory access,
948  // then the memory accesses are different.
949  if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
950  getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
951  if (BaseOpA->isIdenticalTo(*BaseOpB)) {
952  int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
953  int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
954  int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
955  if (LowOffset + LowWidth <= HighOffset)
956  return true;
957  }
958  }
959  return false;
960 }
961 
963  const MachineBasicBlock *MBB,
964  const MachineFunction &MF) const {
965  if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
966  return true;
967  switch (MI.getOpcode()) {
968  case AArch64::HINT:
969  // CSDB hints are scheduling barriers.
970  if (MI.getOperand(0).getImm() == 0x14)
971  return true;
972  break;
973  case AArch64::DSB:
974  case AArch64::ISB:
975  // DSB and ISB also are scheduling barriers.
976  return true;
977  default:;
978  }
979  return isSEHInstruction(MI);
980 }
981 
982 /// analyzeCompare - For a comparison instruction, return the source registers
983 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
984 /// Return true if the comparison instruction can be analyzed.
985 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
986  unsigned &SrcReg2, int &CmpMask,
987  int &CmpValue) const {
988  // The first operand can be a frame index where we'd normally expect a
989  // register.
990  assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
991  if (!MI.getOperand(1).isReg())
992  return false;
993 
994  switch (MI.getOpcode()) {
995  default:
996  break;
997  case AArch64::SUBSWrr:
998  case AArch64::SUBSWrs:
999  case AArch64::SUBSWrx:
1000  case AArch64::SUBSXrr:
1001  case AArch64::SUBSXrs:
1002  case AArch64::SUBSXrx:
1003  case AArch64::ADDSWrr:
1004  case AArch64::ADDSWrs:
1005  case AArch64::ADDSWrx:
1006  case AArch64::ADDSXrr:
1007  case AArch64::ADDSXrs:
1008  case AArch64::ADDSXrx:
1009  // Replace SUBSWrr with SUBWrr if NZCV is not used.
1010  SrcReg = MI.getOperand(1).getReg();
1011  SrcReg2 = MI.getOperand(2).getReg();
1012  CmpMask = ~0;
1013  CmpValue = 0;
1014  return true;
1015  case AArch64::SUBSWri:
1016  case AArch64::ADDSWri:
1017  case AArch64::SUBSXri:
1018  case AArch64::ADDSXri:
1019  SrcReg = MI.getOperand(1).getReg();
1020  SrcReg2 = 0;
1021  CmpMask = ~0;
1022  // FIXME: In order to convert CmpValue to 0 or 1
1023  CmpValue = MI.getOperand(2).getImm() != 0;
1024  return true;
1025  case AArch64::ANDSWri:
1026  case AArch64::ANDSXri:
1027  // ANDS does not use the same encoding scheme as the others xxxS
1028  // instructions.
1029  SrcReg = MI.getOperand(1).getReg();
1030  SrcReg2 = 0;
1031  CmpMask = ~0;
1032  // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
1033  // while the type of CmpValue is int. When converting uint64_t to int,
1034  // the high 32 bits of uint64_t will be lost.
1035  // In fact it causes a bug in spec2006-483.xalancbmk
1036  // CmpValue is only used to compare with zero in OptimizeCompareInstr
1038  MI.getOperand(2).getImm(),
1039  MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
1040  return true;
1041  }
1042 
1043  return false;
1044 }
1045 
1047  MachineBasicBlock *MBB = Instr.getParent();
1048  assert(MBB && "Can't get MachineBasicBlock here");
1049  MachineFunction *MF = MBB->getParent();
1050  assert(MF && "Can't get MachineFunction here");
1051  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1054 
1055  for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1056  ++OpIdx) {
1057  MachineOperand &MO = Instr.getOperand(OpIdx);
1058  const TargetRegisterClass *OpRegCstraints =
1059  Instr.getRegClassConstraint(OpIdx, TII, TRI);
1060 
1061  // If there's no constraint, there's nothing to do.
1062  if (!OpRegCstraints)
1063  continue;
1064  // If the operand is a frame index, there's nothing to do here.
1065  // A frame index operand will resolve correctly during PEI.
1066  if (MO.isFI())
1067  continue;
1068 
1069  assert(MO.isReg() &&
1070  "Operand has register constraints without being a register!");
1071 
1072  unsigned Reg = MO.getReg();
1074  if (!OpRegCstraints->contains(Reg))
1075  return false;
1076  } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1077  !MRI->constrainRegClass(Reg, OpRegCstraints))
1078  return false;
1079  }
1080 
1081  return true;
1082 }
1083 
1084 /// Return the opcode that does not set flags when possible - otherwise
1085 /// return the original opcode. The caller is responsible to do the actual
1086 /// substitution and legality checking.
1087 static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
1088  // Don't convert all compare instructions, because for some the zero register
1089  // encoding becomes the sp register.
1090  bool MIDefinesZeroReg = false;
1091  if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
1092  MIDefinesZeroReg = true;
1093 
1094  switch (MI.getOpcode()) {
1095  default:
1096  return MI.getOpcode();
1097  case AArch64::ADDSWrr:
1098  return AArch64::ADDWrr;
1099  case AArch64::ADDSWri:
1100  return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1101  case AArch64::ADDSWrs:
1102  return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1103  case AArch64::ADDSWrx:
1104  return AArch64::ADDWrx;
1105  case AArch64::ADDSXrr:
1106  return AArch64::ADDXrr;
1107  case AArch64::ADDSXri:
1108  return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1109  case AArch64::ADDSXrs:
1110  return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1111  case AArch64::ADDSXrx:
1112  return AArch64::ADDXrx;
1113  case AArch64::SUBSWrr:
1114  return AArch64::SUBWrr;
1115  case AArch64::SUBSWri:
1116  return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1117  case AArch64::SUBSWrs:
1118  return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1119  case AArch64::SUBSWrx:
1120  return AArch64::SUBWrx;
1121  case AArch64::SUBSXrr:
1122  return AArch64::SUBXrr;
1123  case AArch64::SUBSXri:
1124  return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1125  case AArch64::SUBSXrs:
1126  return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1127  case AArch64::SUBSXrx:
1128  return AArch64::SUBXrx;
1129  }
1130 }
1131 
1132 enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1133 
1134 /// True when condition flags are accessed (either by writing or reading)
1135 /// on the instruction trace starting at From and ending at To.
1136 ///
1137 /// Note: If From and To are from different blocks it's assumed CC are accessed
1138 /// on the path.
1141  const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1142  // Early exit if To is at the beginning of the BB.
1143  if (To == To->getParent()->begin())
1144  return true;
1145 
1146  // Check whether the instructions are in the same basic block
1147  // If not, assume the condition flags might get modified somewhere.
1148  if (To->getParent() != From->getParent())
1149  return true;
1150 
1151  // From must be above To.
1152  assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
1153  [From](MachineInstr &MI) {
1154  return MI.getIterator() == From;
1155  }) != To->getParent()->rend());
1156 
1157  // We iterate backward starting \p To until we hit \p From.
1158  for (--To; To != From; --To) {
1159  const MachineInstr &Instr = *To;
1160 
1161  if (((AccessToCheck & AK_Write) &&
1162  Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1163  ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1164  return true;
1165  }
1166  return false;
1167 }
1168 
1169 /// Try to optimize a compare instruction. A compare instruction is an
1170 /// instruction which produces AArch64::NZCV. It can be truly compare
1171 /// instruction
1172 /// when there are no uses of its destination register.
1173 ///
1174 /// The following steps are tried in order:
1175 /// 1. Convert CmpInstr into an unconditional version.
1176 /// 2. Remove CmpInstr if above there is an instruction producing a needed
1177 /// condition code or an instruction which can be converted into such an
1178 /// instruction.
1179 /// Only comparison with zero is supported.
1181  MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
1182  int CmpValue, const MachineRegisterInfo *MRI) const {
1183  assert(CmpInstr.getParent());
1184  assert(MRI);
1185 
1186  // Replace SUBSWrr with SUBWrr if NZCV is not used.
1187  int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
1188  if (DeadNZCVIdx != -1) {
1189  if (CmpInstr.definesRegister(AArch64::WZR) ||
1190  CmpInstr.definesRegister(AArch64::XZR)) {
1191  CmpInstr.eraseFromParent();
1192  return true;
1193  }
1194  unsigned Opc = CmpInstr.getOpcode();
1195  unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1196  if (NewOpc == Opc)
1197  return false;
1198  const MCInstrDesc &MCID = get(NewOpc);
1199  CmpInstr.setDesc(MCID);
1200  CmpInstr.RemoveOperand(DeadNZCVIdx);
1201  bool succeeded = UpdateOperandRegClass(CmpInstr);
1202  (void)succeeded;
1203  assert(succeeded && "Some operands reg class are incompatible!");
1204  return true;
1205  }
1206 
1207  // Continue only if we have a "ri" where immediate is zero.
1208  // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
1209  // function.
1210  assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
1211  if (CmpValue != 0 || SrcReg2 != 0)
1212  return false;
1213 
1214  // CmpInstr is a Compare instruction if destination register is not used.
1215  if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1216  return false;
1217 
1218  return substituteCmpToZero(CmpInstr, SrcReg, MRI);
1219 }
1220 
1221 /// Get opcode of S version of Instr.
1222 /// If Instr is S version its opcode is returned.
1223 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1224 /// or we are not interested in it.
1225 static unsigned sForm(MachineInstr &Instr) {
1226  switch (Instr.getOpcode()) {
1227  default:
1228  return AArch64::INSTRUCTION_LIST_END;
1229 
1230  case AArch64::ADDSWrr:
1231  case AArch64::ADDSWri:
1232  case AArch64::ADDSXrr:
1233  case AArch64::ADDSXri:
1234  case AArch64::SUBSWrr:
1235  case AArch64::SUBSWri:
1236  case AArch64::SUBSXrr:
1237  case AArch64::SUBSXri:
1238  return Instr.getOpcode();
1239 
1240  case AArch64::ADDWrr:
1241  return AArch64::ADDSWrr;
1242  case AArch64::ADDWri:
1243  return AArch64::ADDSWri;
1244  case AArch64::ADDXrr:
1245  return AArch64::ADDSXrr;
1246  case AArch64::ADDXri:
1247  return AArch64::ADDSXri;
1248  case AArch64::ADCWr:
1249  return AArch64::ADCSWr;
1250  case AArch64::ADCXr:
1251  return AArch64::ADCSXr;
1252  case AArch64::SUBWrr:
1253  return AArch64::SUBSWrr;
1254  case AArch64::SUBWri:
1255  return AArch64::SUBSWri;
1256  case AArch64::SUBXrr:
1257  return AArch64::SUBSXrr;
1258  case AArch64::SUBXri:
1259  return AArch64::SUBSXri;
1260  case AArch64::SBCWr:
1261  return AArch64::SBCSWr;
1262  case AArch64::SBCXr:
1263  return AArch64::SBCSXr;
1264  case AArch64::ANDWri:
1265  return AArch64::ANDSWri;
1266  case AArch64::ANDXri:
1267  return AArch64::ANDSXri;
1268  }
1269 }
1270 
1271 /// Check if AArch64::NZCV should be alive in successors of MBB.
1273  for (auto *BB : MBB->successors())
1274  if (BB->isLiveIn(AArch64::NZCV))
1275  return true;
1276  return false;
1277 }
1278 
1279 namespace {
1280 
1281 struct UsedNZCV {
1282  bool N = false;
1283  bool Z = false;
1284  bool C = false;
1285  bool V = false;
1286 
1287  UsedNZCV() = default;
1288 
1289  UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
1290  this->N |= UsedFlags.N;
1291  this->Z |= UsedFlags.Z;
1292  this->C |= UsedFlags.C;
1293  this->V |= UsedFlags.V;
1294  return *this;
1295  }
1296 };
1297 
1298 } // end anonymous namespace
1299 
1300 /// Find a condition code used by the instruction.
1301 /// Returns AArch64CC::Invalid if either the instruction does not use condition
1302 /// codes or we don't optimize CmpInstr in the presence of such instructions.
1304  switch (Instr.getOpcode()) {
1305  default:
1306  return AArch64CC::Invalid;
1307 
1308  case AArch64::Bcc: {
1309  int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1310  assert(Idx >= 2);
1311  return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
1312  }
1313 
1314  case AArch64::CSINVWr:
1315  case AArch64::CSINVXr:
1316  case AArch64::CSINCWr:
1317  case AArch64::CSINCXr:
1318  case AArch64::CSELWr:
1319  case AArch64::CSELXr:
1320  case AArch64::CSNEGWr:
1321  case AArch64::CSNEGXr:
1322  case AArch64::FCSELSrrr:
1323  case AArch64::FCSELDrrr: {
1324  int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1325  assert(Idx >= 1);
1326  return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
1327  }
1328  }
1329 }
1330 
1331 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1332  assert(CC != AArch64CC::Invalid);
1333  UsedNZCV UsedFlags;
1334  switch (CC) {
1335  default:
1336  break;
1337 
1338  case AArch64CC::EQ: // Z set
1339  case AArch64CC::NE: // Z clear
1340  UsedFlags.Z = true;
1341  break;
1342 
1343  case AArch64CC::HI: // Z clear and C set
1344  case AArch64CC::LS: // Z set or C clear
1345  UsedFlags.Z = true;
1347  case AArch64CC::HS: // C set
1348  case AArch64CC::LO: // C clear
1349  UsedFlags.C = true;
1350  break;
1351 
1352  case AArch64CC::MI: // N set
1353  case AArch64CC::PL: // N clear
1354  UsedFlags.N = true;
1355  break;
1356 
1357  case AArch64CC::VS: // V set
1358  case AArch64CC::VC: // V clear
1359  UsedFlags.V = true;
1360  break;
1361 
1362  case AArch64CC::GT: // Z clear, N and V the same
1363  case AArch64CC::LE: // Z set, N and V differ
1364  UsedFlags.Z = true;
1366  case AArch64CC::GE: // N and V the same
1367  case AArch64CC::LT: // N and V differ
1368  UsedFlags.N = true;
1369  UsedFlags.V = true;
1370  break;
1371  }
1372  return UsedFlags;
1373 }
1374 
1375 static bool isADDSRegImm(unsigned Opcode) {
1376  return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1377 }
1378 
1379 static bool isSUBSRegImm(unsigned Opcode) {
1380  return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1381 }
1382 
1383 /// Check if CmpInstr can be substituted by MI.
1384 ///
1385 /// CmpInstr can be substituted:
1386 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1387 /// - and, MI and CmpInstr are from the same MachineBB
1388 /// - and, condition flags are not alive in successors of the CmpInstr parent
1389 /// - and, if MI opcode is the S form there must be no defs of flags between
1390 /// MI and CmpInstr
1391 /// or if MI opcode is not the S form there must be neither defs of flags
1392 /// nor uses of flags between MI and CmpInstr.
1393 /// - and C/V flags are not used after CmpInstr
1395  const TargetRegisterInfo *TRI) {
1396  assert(MI);
1397  assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1398  assert(CmpInstr);
1399 
1400  const unsigned CmpOpcode = CmpInstr->getOpcode();
1401  if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1402  return false;
1403 
1404  if (MI->getParent() != CmpInstr->getParent())
1405  return false;
1406 
1407  if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1408  return false;
1409 
1410  AccessKind AccessToCheck = AK_Write;
1411  if (sForm(*MI) != MI->getOpcode())
1412  AccessToCheck = AK_All;
1413  if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1414  return false;
1415 
1416  UsedNZCV NZCVUsedAfterCmp;
1417  for (auto I = std::next(CmpInstr->getIterator()),
1418  E = CmpInstr->getParent()->instr_end();
1419  I != E; ++I) {
1420  const MachineInstr &Instr = *I;
1421  if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1423  if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1424  return false;
1425  NZCVUsedAfterCmp |= getUsedNZCV(CC);
1426  }
1427 
1428  if (Instr.modifiesRegister(AArch64::NZCV, TRI))
1429  break;
1430  }
1431 
1432  return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
1433 }
1434 
1435 /// Substitute an instruction comparing to zero with another instruction
1436 /// which produces needed condition flags.
1437 ///
1438 /// Return true on success.
1439 bool AArch64InstrInfo::substituteCmpToZero(
1440  MachineInstr &CmpInstr, unsigned SrcReg,
1441  const MachineRegisterInfo *MRI) const {
1442  assert(MRI);
1443  // Get the unique definition of SrcReg.
1444  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1445  if (!MI)
1446  return false;
1447 
1449 
1450  unsigned NewOpc = sForm(*MI);
1451  if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1452  return false;
1453 
1454  if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
1455  return false;
1456 
1457  // Update the instruction to set NZCV.
1458  MI->setDesc(get(NewOpc));
1459  CmpInstr.eraseFromParent();
1460  bool succeeded = UpdateOperandRegClass(*MI);
1461  (void)succeeded;
1462  assert(succeeded && "Some operands reg class are incompatible!");
1463  MI->addRegisterDefined(AArch64::NZCV, TRI);
1464  return true;
1465 }
1466 
1468  if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
1469  MI.getOpcode() != AArch64::CATCHRET)
1470  return false;
1471 
1472  MachineBasicBlock &MBB = *MI.getParent();
1473  DebugLoc DL = MI.getDebugLoc();
1474 
1475  if (MI.getOpcode() == AArch64::CATCHRET) {
1476  // Skip to the first instruction before the epilog.
1477  const TargetInstrInfo *TII =
1478  MBB.getParent()->getSubtarget().getInstrInfo();
1479  MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
1480  auto MBBI = MachineBasicBlock::iterator(MI);
1481  MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
1482  while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
1483  FirstEpilogSEH != MBB.begin())
1484  FirstEpilogSEH = std::prev(FirstEpilogSEH);
1485  if (FirstEpilogSEH != MBB.begin())
1486  FirstEpilogSEH = std::next(FirstEpilogSEH);
1487  BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
1488  .addReg(AArch64::X0, RegState::Define)
1489  .addMBB(TargetMBB);
1490  BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
1491  .addReg(AArch64::X0, RegState::Define)
1492  .addReg(AArch64::X0)
1493  .addMBB(TargetMBB)
1494  .addImm(0);
1495  return true;
1496  }
1497 
1498  unsigned Reg = MI.getOperand(0).getReg();
1499  const GlobalValue *GV =
1500  cast<GlobalValue>((*MI.memoperands_begin())->getValue());
1501  const TargetMachine &TM = MBB.getParent()->getTarget();
1502  unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1503  const unsigned char MO_NC = AArch64II::MO_NC;
1504 
1505  if ((OpFlags & AArch64II::MO_GOT) != 0) {
1506  BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1507  .addGlobalAddress(GV, 0, OpFlags);
1508  BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1509  .addReg(Reg, RegState::Kill)
1510  .addImm(0)
1512  } else if (TM.getCodeModel() == CodeModel::Large) {
1513  BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
1514  .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
1515  .addImm(0);
1516  BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1517  .addReg(Reg, RegState::Kill)
1518  .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
1519  .addImm(16);
1520  BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1521  .addReg(Reg, RegState::Kill)
1522  .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
1523  .addImm(32);
1524  BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1525  .addReg(Reg, RegState::Kill)
1527  .addImm(48);
1528  BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1529  .addReg(Reg, RegState::Kill)
1530  .addImm(0)
1532  } else if (TM.getCodeModel() == CodeModel::Tiny) {
1533  BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
1534  .addGlobalAddress(GV, 0, OpFlags);
1535  } else {
1536  BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1537  .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1538  unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1539  BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1540  .addReg(Reg, RegState::Kill)
1541  .addGlobalAddress(GV, 0, LoFlags)
1543  }
1544 
1545  MBB.erase(MI);
1546 
1547  return true;
1548 }
1549 
1550 // Return true if this instruction simply sets its single destination register
1551 // to zero. This is equivalent to a register rename of the zero-register.
1553  switch (MI.getOpcode()) {
1554  default:
1555  break;
1556  case AArch64::MOVZWi:
1557  case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
1558  if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
1559  assert(MI.getDesc().getNumOperands() == 3 &&
1560  MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
1561  return true;
1562  }
1563  break;
1564  case AArch64::ANDWri: // and Rd, Rzr, #imm
1565  return MI.getOperand(1).getReg() == AArch64::WZR;
1566  case AArch64::ANDXri:
1567  return MI.getOperand(1).getReg() == AArch64::XZR;
1568  case TargetOpcode::COPY:
1569  return MI.getOperand(1).getReg() == AArch64::WZR;
1570  }
1571  return false;
1572 }
1573 
1574 // Return true if this instruction simply renames a general register without
1575 // modifying bits.
1577  switch (MI.getOpcode()) {
1578  default:
1579  break;
1580  case TargetOpcode::COPY: {
1581  // GPR32 copies will by lowered to ORRXrs
1582  unsigned DstReg = MI.getOperand(0).getReg();
1583  return (AArch64::GPR32RegClass.contains(DstReg) ||
1584  AArch64::GPR64RegClass.contains(DstReg));
1585  }
1586  case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
1587  if (MI.getOperand(1).getReg() == AArch64::XZR) {
1588  assert(MI.getDesc().getNumOperands() == 4 &&
1589  MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
1590  return true;
1591  }
1592  break;
1593  case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
1594  if (MI.getOperand(2).getImm() == 0) {
1595  assert(MI.getDesc().getNumOperands() == 4 &&
1596  MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
1597  return true;
1598  }
1599  break;
1600  }
1601  return false;
1602 }
1603 
1604 // Return true if this instruction simply renames a general register without
1605 // modifying bits.
1607  switch (MI.getOpcode()) {
1608  default:
1609  break;
1610  case TargetOpcode::COPY: {
1611  // FPR64 copies will by lowered to ORR.16b
1612  unsigned DstReg = MI.getOperand(0).getReg();
1613  return (AArch64::FPR64RegClass.contains(DstReg) ||
1614  AArch64::FPR128RegClass.contains(DstReg));
1615  }
1616  case AArch64::ORRv16i8:
1617  if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
1618  assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
1619  "invalid ORRv16i8 operands");
1620  return true;
1621  }
1622  break;
1623  }
1624  return false;
1625 }
1626 
1628  int &FrameIndex) const {
1629  switch (MI.getOpcode()) {
1630  default:
1631  break;
1632  case AArch64::LDRWui:
1633  case AArch64::LDRXui:
1634  case AArch64::LDRBui:
1635  case AArch64::LDRHui:
1636  case AArch64::LDRSui:
1637  case AArch64::LDRDui:
1638  case AArch64::LDRQui:
1639  if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1640  MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1641  FrameIndex = MI.getOperand(1).getIndex();
1642  return MI.getOperand(0).getReg();
1643  }
1644  break;
1645  }
1646 
1647  return 0;
1648 }
1649 
1651  int &FrameIndex) const {
1652  switch (MI.getOpcode()) {
1653  default:
1654  break;
1655  case AArch64::STRWui:
1656  case AArch64::STRXui:
1657  case AArch64::STRBui:
1658  case AArch64::STRHui:
1659  case AArch64::STRSui:
1660  case AArch64::STRDui:
1661  case AArch64::STRQui:
1662  if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1663  MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1664  FrameIndex = MI.getOperand(1).getIndex();
1665  return MI.getOperand(0).getReg();
1666  }
1667  break;
1668  }
1669  return 0;
1670 }
1671 
1672 /// Check all MachineMemOperands for a hint to suppress pairing.
1674  return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1675  return MMO->getFlags() & MOSuppressPair;
1676  });
1677 }
1678 
1679 /// Set a flag on the first MachineMemOperand to suppress pairing.
1681  if (MI.memoperands_empty())
1682  return;
1683  (*MI.memoperands_begin())->setFlags(MOSuppressPair);
1684 }
1685 
1686 /// Check all MachineMemOperands for a hint that the load/store is strided.
1688  return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1689  return MMO->getFlags() & MOStridedAccess;
1690  });
1691 }
1692 
1694  switch (Opc) {
1695  default:
1696  return false;
1697  case AArch64::STURSi:
1698  case AArch64::STURDi:
1699  case AArch64::STURQi:
1700  case AArch64::STURBBi:
1701  case AArch64::STURHHi:
1702  case AArch64::STURWi:
1703  case AArch64::STURXi:
1704  case AArch64::LDURSi:
1705  case AArch64::LDURDi:
1706  case AArch64::LDURQi:
1707  case AArch64::LDURWi:
1708  case AArch64::LDURXi:
1709  case AArch64::LDURSWi:
1710  case AArch64::LDURHHi:
1711  case AArch64::LDURBBi:
1712  case AArch64::LDURSBWi:
1713  case AArch64::LDURSHWi:
1714  return true;
1715  }
1716 }
1717 
1719  switch (MI.getOpcode()) {
1720  default:
1721  return false;
1722  // Scaled instructions.
1723  case AArch64::STRSui:
1724  case AArch64::STRDui:
1725  case AArch64::STRQui:
1726  case AArch64::STRXui:
1727  case AArch64::STRWui:
1728  case AArch64::LDRSui:
1729  case AArch64::LDRDui:
1730  case AArch64::LDRQui:
1731  case AArch64::LDRXui:
1732  case AArch64::LDRWui:
1733  case AArch64::LDRSWui:
1734  // Unscaled instructions.
1735  case AArch64::STURSi:
1736  case AArch64::STURDi:
1737  case AArch64::STURQi:
1738  case AArch64::STURWi:
1739  case AArch64::STURXi:
1740  case AArch64::LDURSi:
1741  case AArch64::LDURDi:
1742  case AArch64::LDURQi:
1743  case AArch64::LDURWi:
1744  case AArch64::LDURXi:
1745  case AArch64::LDURSWi:
1746  return true;
1747  }
1748 }
1749 
1751  bool &Is64Bit) {
1752  switch (Opc) {
1753  default:
1754  llvm_unreachable("Opcode has no flag setting equivalent!");
1755  // 32-bit cases:
1756  case AArch64::ADDWri:
1757  Is64Bit = false;
1758  return AArch64::ADDSWri;
1759  case AArch64::ADDWrr:
1760  Is64Bit = false;
1761  return AArch64::ADDSWrr;
1762  case AArch64::ADDWrs:
1763  Is64Bit = false;
1764  return AArch64::ADDSWrs;
1765  case AArch64::ADDWrx:
1766  Is64Bit = false;
1767  return AArch64::ADDSWrx;
1768  case AArch64::ANDWri:
1769  Is64Bit = false;
1770  return AArch64::ANDSWri;
1771  case AArch64::ANDWrr:
1772  Is64Bit = false;
1773  return AArch64::ANDSWrr;
1774  case AArch64::ANDWrs:
1775  Is64Bit = false;
1776  return AArch64::ANDSWrs;
1777  case AArch64::BICWrr:
1778  Is64Bit = false;
1779  return AArch64::BICSWrr;
1780  case AArch64::BICWrs:
1781  Is64Bit = false;
1782  return AArch64::BICSWrs;
1783  case AArch64::SUBWri:
1784  Is64Bit = false;
1785  return AArch64::SUBSWri;
1786  case AArch64::SUBWrr:
1787  Is64Bit = false;
1788  return AArch64::SUBSWrr;
1789  case AArch64::SUBWrs:
1790  Is64Bit = false;
1791  return AArch64::SUBSWrs;
1792  case AArch64::SUBWrx:
1793  Is64Bit = false;
1794  return AArch64::SUBSWrx;
1795  // 64-bit cases:
1796  case AArch64::ADDXri:
1797  Is64Bit = true;
1798  return AArch64::ADDSXri;
1799  case AArch64::ADDXrr:
1800  Is64Bit = true;
1801  return AArch64::ADDSXrr;
1802  case AArch64::ADDXrs:
1803  Is64Bit = true;
1804  return AArch64::ADDSXrs;
1805  case AArch64::ADDXrx:
1806  Is64Bit = true;
1807  return AArch64::ADDSXrx;
1808  case AArch64::ANDXri:
1809  Is64Bit = true;
1810  return AArch64::ANDSXri;
1811  case AArch64::ANDXrr:
1812  Is64Bit = true;
1813  return AArch64::ANDSXrr;
1814  case AArch64::ANDXrs:
1815  Is64Bit = true;
1816  return AArch64::ANDSXrs;
1817  case AArch64::BICXrr:
1818  Is64Bit = true;
1819  return AArch64::BICSXrr;
1820  case AArch64::BICXrs:
1821  Is64Bit = true;
1822  return AArch64::BICSXrs;
1823  case AArch64::SUBXri:
1824  Is64Bit = true;
1825  return AArch64::SUBSXri;
1826  case AArch64::SUBXrr:
1827  Is64Bit = true;
1828  return AArch64::SUBSXrr;
1829  case AArch64::SUBXrs:
1830  Is64Bit = true;
1831  return AArch64::SUBSXrs;
1832  case AArch64::SUBXrx:
1833  Is64Bit = true;
1834  return AArch64::SUBSXrx;
1835  }
1836 }
1837 
1838 // Is this a candidate for ld/st merging or pairing? For example, we don't
1839 // touch volatiles or load/stores that have a hint to avoid pair formation.
1841  // If this is a volatile load/store, don't mess with it.
1842  if (MI.hasOrderedMemoryRef())
1843  return false;
1844 
1845  // Make sure this is a reg/fi+imm (as opposed to an address reloc).
1846  assert((MI.getOperand(1).isReg() || MI.getOperand(1).isFI()) &&
1847  "Expected a reg or frame index operand.");
1848  if (!MI.getOperand(2).isImm())
1849  return false;
1850 
1851  // Can't merge/pair if the instruction modifies the base register.
1852  // e.g., ldr x0, [x0]
1853  // This case will never occur with an FI base.
1854  if (MI.getOperand(1).isReg()) {
1855  unsigned BaseReg = MI.getOperand(1).getReg();
1857  if (MI.modifiesRegister(BaseReg, TRI))
1858  return false;
1859  }
1860 
1861  // Check if this load/store has a hint to avoid pair formation.
1862  // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
1863  if (isLdStPairSuppressed(MI))
1864  return false;
1865 
1866  // On some CPUs quad load/store pairs are slower than two single load/stores.
1867  if (Subtarget.isPaired128Slow()) {
1868  switch (MI.getOpcode()) {
1869  default:
1870  break;
1871  case AArch64::LDURQi:
1872  case AArch64::STURQi:
1873  case AArch64::LDRQui:
1874  case AArch64::STRQui:
1875  return false;
1876  }
1877  }
1878 
1879  return true;
1880 }
1881 
1883  MachineOperand *&BaseOp,
1884  int64_t &Offset,
1885  const TargetRegisterInfo *TRI) const {
1886  unsigned Width;
1887  return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI);
1888 }
1889 
1891  MachineInstr &LdSt, MachineOperand *&BaseOp, int64_t &Offset,
1892  unsigned &Width, const TargetRegisterInfo *TRI) const {
1893  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
1894  // Handle only loads/stores with base register followed by immediate offset.
1895  if (LdSt.getNumExplicitOperands() == 3) {
1896  // Non-paired instruction (e.g., ldr x1, [x0, #8]).
1897  if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
1898  !LdSt.getOperand(2).isImm())
1899  return false;
1900  } else if (LdSt.getNumExplicitOperands() == 4) {
1901  // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
1902  if (!LdSt.getOperand(1).isReg() ||
1903  (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
1904  !LdSt.getOperand(3).isImm())
1905  return false;
1906  } else
1907  return false;
1908 
1909  // Get the scaling factor for the instruction and set the width for the
1910  // instruction.
1911  unsigned Scale = 0;
1912  int64_t Dummy1, Dummy2;
1913 
1914  // If this returns false, then it's an instruction we don't want to handle.
1915  if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
1916  return false;
1917 
1918  // Compute the offset. Offset is calculated as the immediate operand
1919  // multiplied by the scaling factor. Unscaled instructions have scaling factor
1920  // set to 1.
1921  if (LdSt.getNumExplicitOperands() == 3) {
1922  BaseOp = &LdSt.getOperand(1);
1923  Offset = LdSt.getOperand(2).getImm() * Scale;
1924  } else {
1925  assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
1926  BaseOp = &LdSt.getOperand(2);
1927  Offset = LdSt.getOperand(3).getImm() * Scale;
1928  }
1929 
1930  assert((BaseOp->isReg() || BaseOp->isFI()) &&
1931  "getMemOperandWithOffset only supports base "
1932  "operands of type register or frame index.");
1933 
1934  return true;
1935 }
1936 
1939  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
1940  MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
1941  assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
1942  return OfsOp;
1943 }
1944 
1945 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
1946  unsigned &Width, int64_t &MinOffset,
1947  int64_t &MaxOffset) const {
1948  switch (Opcode) {
1949  // Not a memory operation or something we want to handle.
1950  default:
1951  Scale = Width = 0;
1952  MinOffset = MaxOffset = 0;
1953  return false;
1954  case AArch64::STRWpost:
1955  case AArch64::LDRWpost:
1956  Width = 32;
1957  Scale = 4;
1958  MinOffset = -256;
1959  MaxOffset = 255;
1960  break;
1961  case AArch64::LDURQi:
1962  case AArch64::STURQi:
1963  Width = 16;
1964  Scale = 1;
1965  MinOffset = -256;
1966  MaxOffset = 255;
1967  break;
1968  case AArch64::LDURXi:
1969  case AArch64::LDURDi:
1970  case AArch64::STURXi:
1971  case AArch64::STURDi:
1972  Width = 8;
1973  Scale = 1;
1974  MinOffset = -256;
1975  MaxOffset = 255;
1976  break;
1977  case AArch64::LDURWi:
1978  case AArch64::LDURSi:
1979  case AArch64::LDURSWi:
1980  case AArch64::STURWi:
1981  case AArch64::STURSi:
1982  Width = 4;
1983  Scale = 1;
1984  MinOffset = -256;
1985  MaxOffset = 255;
1986  break;
1987  case AArch64::LDURHi:
1988  case AArch64::LDURHHi:
1989  case AArch64::LDURSHXi:
1990  case AArch64::LDURSHWi:
1991  case AArch64::STURHi:
1992  case AArch64::STURHHi:
1993  Width = 2;
1994  Scale = 1;
1995  MinOffset = -256;
1996  MaxOffset = 255;
1997  break;
1998  case AArch64::LDURBi:
1999  case AArch64::LDURBBi:
2000  case AArch64::LDURSBXi:
2001  case AArch64::LDURSBWi:
2002  case AArch64::STURBi:
2003  case AArch64::STURBBi:
2004  Width = 1;
2005  Scale = 1;
2006  MinOffset = -256;
2007  MaxOffset = 255;
2008  break;
2009  case AArch64::LDPQi:
2010  case AArch64::LDNPQi:
2011  case AArch64::STPQi:
2012  case AArch64::STNPQi:
2013  Scale = 16;
2014  Width = 32;
2015  MinOffset = -64;
2016  MaxOffset = 63;
2017  break;
2018  case AArch64::LDRQui:
2019  case AArch64::STRQui:
2020  Scale = Width = 16;
2021  MinOffset = 0;
2022  MaxOffset = 4095;
2023  break;
2024  case AArch64::LDPXi:
2025  case AArch64::LDPDi:
2026  case AArch64::LDNPXi:
2027  case AArch64::LDNPDi:
2028  case AArch64::STPXi:
2029  case AArch64::STPDi:
2030  case AArch64::STNPXi:
2031  case AArch64::STNPDi:
2032  Scale = 8;
2033  Width = 16;
2034  MinOffset = -64;
2035  MaxOffset = 63;
2036  break;
2037  case AArch64::LDRXui:
2038  case AArch64::LDRDui:
2039  case AArch64::STRXui:
2040  case AArch64::STRDui:
2041  Scale = Width = 8;
2042  MinOffset = 0;
2043  MaxOffset = 4095;
2044  break;
2045  case AArch64::LDPWi:
2046  case AArch64::LDPSi:
2047  case AArch64::LDNPWi:
2048  case AArch64::LDNPSi:
2049  case AArch64::STPWi:
2050  case AArch64::STPSi:
2051  case AArch64::STNPWi:
2052  case AArch64::STNPSi:
2053  Scale = 4;
2054  Width = 8;
2055  MinOffset = -64;
2056  MaxOffset = 63;
2057  break;
2058  case AArch64::LDRWui:
2059  case AArch64::LDRSui:
2060  case AArch64::LDRSWui:
2061  case AArch64::STRWui:
2062  case AArch64::STRSui:
2063  Scale = Width = 4;
2064  MinOffset = 0;
2065  MaxOffset = 4095;
2066  break;
2067  case AArch64::LDRHui:
2068  case AArch64::LDRHHui:
2069  case AArch64::STRHui:
2070  case AArch64::STRHHui:
2071  Scale = Width = 2;
2072  MinOffset = 0;
2073  MaxOffset = 4095;
2074  break;
2075  case AArch64::LDRBui:
2076  case AArch64::LDRBBui:
2077  case AArch64::STRBui:
2078  case AArch64::STRBBui:
2079  Scale = Width = 1;
2080  MinOffset = 0;
2081  MaxOffset = 4095;
2082  break;
2083  }
2084 
2085  return true;
2086 }
2087 
2088 static unsigned getOffsetStride(unsigned Opc) {
2089  switch (Opc) {
2090  default:
2091  return 0;
2092  case AArch64::LDURQi:
2093  case AArch64::STURQi:
2094  return 16;
2095  case AArch64::LDURXi:
2096  case AArch64::LDURDi:
2097  case AArch64::STURXi:
2098  case AArch64::STURDi:
2099  return 8;
2100  case AArch64::LDURWi:
2101  case AArch64::LDURSi:
2102  case AArch64::LDURSWi:
2103  case AArch64::STURWi:
2104  case AArch64::STURSi:
2105  return 4;
2106  }
2107 }
2108 
2109 // Scale the unscaled offsets. Returns false if the unscaled offset can't be
2110 // scaled.
2111 static bool scaleOffset(unsigned Opc, int64_t &Offset) {
2112  unsigned OffsetStride = getOffsetStride(Opc);
2113  if (OffsetStride == 0)
2114  return false;
2115  // If the byte-offset isn't a multiple of the stride, we can't scale this
2116  // offset.
2117  if (Offset % OffsetStride != 0)
2118  return false;
2119 
2120  // Convert the byte-offset used by unscaled into an "element" offset used
2121  // by the scaled pair load/store instructions.
2122  Offset /= OffsetStride;
2123  return true;
2124 }
2125 
2126 // Unscale the scaled offsets. Returns false if the scaled offset can't be
2127 // unscaled.
2128 static bool unscaleOffset(unsigned Opc, int64_t &Offset) {
2129  unsigned OffsetStride = getOffsetStride(Opc);
2130  if (OffsetStride == 0)
2131  return false;
2132 
2133  // Convert the "element" offset used by scaled pair load/store instructions
2134  // into the byte-offset used by unscaled.
2135  Offset *= OffsetStride;
2136  return true;
2137 }
2138 
2139 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
2140  if (FirstOpc == SecondOpc)
2141  return true;
2142  // We can also pair sign-ext and zero-ext instructions.
2143  switch (FirstOpc) {
2144  default:
2145  return false;
2146  case AArch64::LDRWui:
2147  case AArch64::LDURWi:
2148  return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
2149  case AArch64::LDRSWui:
2150  case AArch64::LDURSWi:
2151  return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
2152  }
2153  // These instructions can't be paired based on their opcodes.
2154  return false;
2155 }
2156 
2157 static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
2158  int64_t Offset1, unsigned Opcode1, int FI2,
2159  int64_t Offset2, unsigned Opcode2) {
2160  // Accesses through fixed stack object frame indices may access a different
2161  // fixed stack slot. Check that the object offsets + offsets match.
2162  if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
2163  int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
2164  int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
2165  assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
2166  // Get the byte-offset from the object offset.
2167  if (!unscaleOffset(Opcode1, Offset1) || !unscaleOffset(Opcode2, Offset2))
2168  return false;
2169  ObjectOffset1 += Offset1;
2170  ObjectOffset2 += Offset2;
2171  // Get the "element" index in the object.
2172  if (!scaleOffset(Opcode1, ObjectOffset1) ||
2173  !scaleOffset(Opcode2, ObjectOffset2))
2174  return false;
2175  return ObjectOffset1 + 1 == ObjectOffset2;
2176  }
2177 
2178  return FI1 == FI2;
2179 }
2180 
2181 /// Detect opportunities for ldp/stp formation.
2182 ///
2183 /// Only called for LdSt for which getMemOperandWithOffset returns true.
2185  MachineOperand &BaseOp2,
2186  unsigned NumLoads) const {
2187  MachineInstr &FirstLdSt = *BaseOp1.getParent();
2188  MachineInstr &SecondLdSt = *BaseOp2.getParent();
2189  if (BaseOp1.getType() != BaseOp2.getType())
2190  return false;
2191 
2192  assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
2193  "Only base registers and frame indices are supported.");
2194 
2195  // Check for both base regs and base FI.
2196  if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
2197  return false;
2198 
2199  // Only cluster up to a single pair.
2200  if (NumLoads > 1)
2201  return false;
2202 
2203  if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
2204  return false;
2205 
2206  // Can we pair these instructions based on their opcodes?
2207  unsigned FirstOpc = FirstLdSt.getOpcode();
2208  unsigned SecondOpc = SecondLdSt.getOpcode();
2209  if (!canPairLdStOpc(FirstOpc, SecondOpc))
2210  return false;
2211 
2212  // Can't merge volatiles or load/stores that have a hint to avoid pair
2213  // formation, for example.
2214  if (!isCandidateToMergeOrPair(FirstLdSt) ||
2215  !isCandidateToMergeOrPair(SecondLdSt))
2216  return false;
2217 
2218  // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
2219  int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
2220  if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
2221  return false;
2222 
2223  int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
2224  if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
2225  return false;
2226 
2227  // Pairwise instructions have a 7-bit signed offset field.
2228  if (Offset1 > 63 || Offset1 < -64)
2229  return false;
2230 
2231  // The caller should already have ordered First/SecondLdSt by offset.
2232  // Note: except for non-equal frame index bases
2233  if (BaseOp1.isFI()) {
2234  assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 >= Offset2) &&
2235  "Caller should have ordered offsets.");
2236 
2237  const MachineFrameInfo &MFI =
2238  FirstLdSt.getParent()->getParent()->getFrameInfo();
2239  return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
2240  BaseOp2.getIndex(), Offset2, SecondOpc);
2241  }
2242 
2243  assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
2244  "Caller should have ordered offsets.");
2245 
2246  return Offset1 + 1 == Offset2;
2247 }
2248 
2250  unsigned Reg, unsigned SubIdx,
2251  unsigned State,
2252  const TargetRegisterInfo *TRI) {
2253  if (!SubIdx)
2254  return MIB.addReg(Reg, State);
2255 
2257  return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
2258  return MIB.addReg(Reg, State, SubIdx);
2259 }
2260 
2261 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
2262  unsigned NumRegs) {
2263  // We really want the positive remainder mod 32 here, that happens to be
2264  // easily obtainable with a mask.
2265  return ((DestReg - SrcReg) & 0x1f) < NumRegs;
2266 }
2267 
2270  const DebugLoc &DL, unsigned DestReg,
2271  unsigned SrcReg, bool KillSrc,
2272  unsigned Opcode,
2273  ArrayRef<unsigned> Indices) const {
2274  assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
2276  uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2277  uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2278  unsigned NumRegs = Indices.size();
2279 
2280  int SubReg = 0, End = NumRegs, Incr = 1;
2281  if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
2282  SubReg = NumRegs - 1;
2283  End = -1;
2284  Incr = -1;
2285  }
2286 
2287  for (; SubReg != End; SubReg += Incr) {
2288  const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
2289  AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2290  AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
2291  AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2292  }
2293 }
2294 
2297  DebugLoc DL, unsigned DestReg,
2298  unsigned SrcReg, bool KillSrc,
2299  unsigned Opcode, unsigned ZeroReg,
2300  llvm::ArrayRef<unsigned> Indices) const {
2302  unsigned NumRegs = Indices.size();
2303 
2304 #ifndef NDEBUG
2305  uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2306  uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2307  assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
2308  "GPR reg sequences should not be able to overlap");
2309 #endif
2310 
2311  for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
2312  const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
2313  AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2314  MIB.addReg(ZeroReg);
2315  AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2316  MIB.addImm(0);
2317  }
2318 }
2319 
2322  const DebugLoc &DL, unsigned DestReg,
2323  unsigned SrcReg, bool KillSrc) const {
2324  if (AArch64::GPR32spRegClass.contains(DestReg) &&
2325  (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
2327 
2328  if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
2329  // If either operand is WSP, expand to ADD #0.
2330  if (Subtarget.hasZeroCycleRegMove()) {
2331  // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
2332  unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2333  &AArch64::GPR64spRegClass);
2334  unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2335  &AArch64::GPR64spRegClass);
2336  // This instruction is reading and writing X registers. This may upset
2337  // the register scavenger and machine verifier, so we need to indicate
2338  // that we are reading an undefined value from SrcRegX, but a proper
2339  // value from SrcReg.
2340  BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
2341  .addReg(SrcRegX, RegState::Undef)
2342  .addImm(0)
2344  .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2345  } else {
2346  BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
2347  .addReg(SrcReg, getKillRegState(KillSrc))
2348  .addImm(0)
2350  }
2351  } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
2352  BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
2353  .addImm(0)
2355  } else {
2356  if (Subtarget.hasZeroCycleRegMove()) {
2357  // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
2358  unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2359  &AArch64::GPR64spRegClass);
2360  unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2361  &AArch64::GPR64spRegClass);
2362  // This instruction is reading and writing X registers. This may upset
2363  // the register scavenger and machine verifier, so we need to indicate
2364  // that we are reading an undefined value from SrcRegX, but a proper
2365  // value from SrcReg.
2366  BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
2367  .addReg(AArch64::XZR)
2368  .addReg(SrcRegX, RegState::Undef)
2369  .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2370  } else {
2371  // Otherwise, expand to ORR WZR.
2372  BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
2373  .addReg(AArch64::WZR)
2374  .addReg(SrcReg, getKillRegState(KillSrc));
2375  }
2376  }
2377  return;
2378  }
2379 
2380  if (AArch64::GPR64spRegClass.contains(DestReg) &&
2381  (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
2382  if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
2383  // If either operand is SP, expand to ADD #0.
2384  BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
2385  .addReg(SrcReg, getKillRegState(KillSrc))
2386  .addImm(0)
2388  } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
2389  BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
2390  .addImm(0)
2392  } else {
2393  // Otherwise, expand to ORR XZR.
2394  BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
2395  .addReg(AArch64::XZR)
2396  .addReg(SrcReg, getKillRegState(KillSrc));
2397  }
2398  return;
2399  }
2400 
2401  // Copy a DDDD register quad by copying the individual sub-registers.
2402  if (AArch64::DDDDRegClass.contains(DestReg) &&
2403  AArch64::DDDDRegClass.contains(SrcReg)) {
2404  static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2405  AArch64::dsub2, AArch64::dsub3};
2406  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2407  Indices);
2408  return;
2409  }
2410 
2411  // Copy a DDD register triple by copying the individual sub-registers.
2412  if (AArch64::DDDRegClass.contains(DestReg) &&
2413  AArch64::DDDRegClass.contains(SrcReg)) {
2414  static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2415  AArch64::dsub2};
2416  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2417  Indices);
2418  return;
2419  }
2420 
2421  // Copy a DD register pair by copying the individual sub-registers.
2422  if (AArch64::DDRegClass.contains(DestReg) &&
2423  AArch64::DDRegClass.contains(SrcReg)) {
2424  static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
2425  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2426  Indices);
2427  return;
2428  }
2429 
2430  // Copy a QQQQ register quad by copying the individual sub-registers.
2431  if (AArch64::QQQQRegClass.contains(DestReg) &&
2432  AArch64::QQQQRegClass.contains(SrcReg)) {
2433  static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2434  AArch64::qsub2, AArch64::qsub3};
2435  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2436  Indices);
2437  return;
2438  }
2439 
2440  // Copy a QQQ register triple by copying the individual sub-registers.
2441  if (AArch64::QQQRegClass.contains(DestReg) &&
2442  AArch64::QQQRegClass.contains(SrcReg)) {
2443  static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2444  AArch64::qsub2};
2445  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2446  Indices);
2447  return;
2448  }
2449 
2450  // Copy a QQ register pair by copying the individual sub-registers.
2451  if (AArch64::QQRegClass.contains(DestReg) &&
2452  AArch64::QQRegClass.contains(SrcReg)) {
2453  static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
2454  copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2455  Indices);
2456  return;
2457  }
2458 
2459  if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
2460  AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
2461  static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
2462  copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
2463  AArch64::XZR, Indices);
2464  return;
2465  }
2466 
2467  if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
2468  AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
2469  static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
2470  copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
2471  AArch64::WZR, Indices);
2472  return;
2473  }
2474 
2475  if (AArch64::FPR128RegClass.contains(DestReg) &&
2476  AArch64::FPR128RegClass.contains(SrcReg)) {
2477  if (Subtarget.hasNEON()) {
2478  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2479  .addReg(SrcReg)
2480  .addReg(SrcReg, getKillRegState(KillSrc));
2481  } else {
2482  BuildMI(MBB, I, DL, get(AArch64::STRQpre))
2483  .addReg(AArch64::SP, RegState::Define)
2484  .addReg(SrcReg, getKillRegState(KillSrc))
2485  .addReg(AArch64::SP)
2486  .addImm(-16);
2487  BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
2488  .addReg(AArch64::SP, RegState::Define)
2489  .addReg(DestReg, RegState::Define)
2490  .addReg(AArch64::SP)
2491  .addImm(16);
2492  }
2493  return;
2494  }
2495 
2496  if (AArch64::FPR64RegClass.contains(DestReg) &&
2497  AArch64::FPR64RegClass.contains(SrcReg)) {
2498  if (Subtarget.hasNEON()) {
2499  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2500  &AArch64::FPR128RegClass);
2501  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2502  &AArch64::FPR128RegClass);
2503  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2504  .addReg(SrcReg)
2505  .addReg(SrcReg, getKillRegState(KillSrc));
2506  } else {
2507  BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2508  .addReg(SrcReg, getKillRegState(KillSrc));
2509  }
2510  return;
2511  }
2512 
2513  if (AArch64::FPR32RegClass.contains(DestReg) &&
2514  AArch64::FPR32RegClass.contains(SrcReg)) {
2515  if (Subtarget.hasNEON()) {
2516  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2517  &AArch64::FPR128RegClass);
2518  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2519  &AArch64::FPR128RegClass);
2520  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2521  .addReg(SrcReg)
2522  .addReg(SrcReg, getKillRegState(KillSrc));
2523  } else {
2524  BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2525  .addReg(SrcReg, getKillRegState(KillSrc));
2526  }
2527  return;
2528  }
2529 
2530  if (AArch64::FPR16RegClass.contains(DestReg) &&
2531  AArch64::FPR16RegClass.contains(SrcReg)) {
2532  if (Subtarget.hasNEON()) {
2533  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2534  &AArch64::FPR128RegClass);
2535  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2536  &AArch64::FPR128RegClass);
2537  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2538  .addReg(SrcReg)
2539  .addReg(SrcReg, getKillRegState(KillSrc));
2540  } else {
2541  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2542  &AArch64::FPR32RegClass);
2543  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2544  &AArch64::FPR32RegClass);
2545  BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2546  .addReg(SrcReg, getKillRegState(KillSrc));
2547  }
2548  return;
2549  }
2550 
2551  if (AArch64::FPR8RegClass.contains(DestReg) &&
2552  AArch64::FPR8RegClass.contains(SrcReg)) {
2553  if (Subtarget.hasNEON()) {
2554  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2555  &AArch64::FPR128RegClass);
2556  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2557  &AArch64::FPR128RegClass);
2558  BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2559  .addReg(SrcReg)
2560  .addReg(SrcReg, getKillRegState(KillSrc));
2561  } else {
2562  DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2563  &AArch64::FPR32RegClass);
2564  SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2565  &AArch64::FPR32RegClass);
2566  BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2567  .addReg(SrcReg, getKillRegState(KillSrc));
2568  }
2569  return;
2570  }
2571 
2572  // Copies between GPR64 and FPR64.
2573  if (AArch64::FPR64RegClass.contains(DestReg) &&
2574  AArch64::GPR64RegClass.contains(SrcReg)) {
2575  BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2576  .addReg(SrcReg, getKillRegState(KillSrc));
2577  return;
2578  }
2579  if (AArch64::GPR64RegClass.contains(DestReg) &&
2580  AArch64::FPR64RegClass.contains(SrcReg)) {
2581  BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2582  .addReg(SrcReg, getKillRegState(KillSrc));
2583  return;
2584  }
2585  // Copies between GPR32 and FPR32.
2586  if (AArch64::FPR32RegClass.contains(DestReg) &&
2587  AArch64::GPR32RegClass.contains(SrcReg)) {
2588  BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2589  .addReg(SrcReg, getKillRegState(KillSrc));
2590  return;
2591  }
2592  if (AArch64::GPR32RegClass.contains(DestReg) &&
2593  AArch64::FPR32RegClass.contains(SrcReg)) {
2594  BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2595  .addReg(SrcReg, getKillRegState(KillSrc));
2596  return;
2597  }
2598 
2599  if (DestReg == AArch64::NZCV) {
2600  assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2601  BuildMI(MBB, I, DL, get(AArch64::MSR))
2602  .addImm(AArch64SysReg::NZCV)
2603  .addReg(SrcReg, getKillRegState(KillSrc))
2604  .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
2605  return;
2606  }
2607 
2608  if (SrcReg == AArch64::NZCV) {
2609  assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
2610  BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
2611  .addImm(AArch64SysReg::NZCV)
2612  .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
2613  return;
2614  }
2615 
2616  llvm_unreachable("unimplemented reg-to-reg copy");
2617 }
2618 
2620  MachineBasicBlock &MBB,
2621  MachineBasicBlock::iterator InsertBefore,
2622  const MCInstrDesc &MCID,
2623  unsigned SrcReg, bool IsKill,
2624  unsigned SubIdx0, unsigned SubIdx1, int FI,
2625  MachineMemOperand *MMO) {
2626  unsigned SrcReg0 = SrcReg;
2627  unsigned SrcReg1 = SrcReg;
2629  SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
2630  SubIdx0 = 0;
2631  SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
2632  SubIdx1 = 0;
2633  }
2634  BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
2635  .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
2636  .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
2637  .addFrameIndex(FI)
2638  .addImm(0)
2639  .addMemOperand(MMO);
2640 }
2641 
2643  MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
2644  bool isKill, int FI, const TargetRegisterClass *RC,
2645  const TargetRegisterInfo *TRI) const {
2646  MachineFunction &MF = *MBB.getParent();
2647  MachineFrameInfo &MFI = MF.getFrameInfo();
2648  unsigned Align = MFI.getObjectAlignment(FI);
2649 
2652  PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
2653  unsigned Opc = 0;
2654  bool Offset = true;
2655  switch (TRI->getSpillSize(*RC)) {
2656  case 1:
2657  if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2658  Opc = AArch64::STRBui;
2659  break;
2660  case 2:
2661  if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2662  Opc = AArch64::STRHui;
2663  break;
2664  case 4:
2665  if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2666  Opc = AArch64::STRWui;
2668  MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2669  else
2670  assert(SrcReg != AArch64::WSP);
2671  } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2672  Opc = AArch64::STRSui;
2673  break;
2674  case 8:
2675  if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2676  Opc = AArch64::STRXui;
2678  MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2679  else
2680  assert(SrcReg != AArch64::SP);
2681  } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
2682  Opc = AArch64::STRDui;
2683  } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
2685  get(AArch64::STPWi), SrcReg, isKill,
2686  AArch64::sube32, AArch64::subo32, FI, MMO);
2687  return;
2688  }
2689  break;
2690  case 16:
2691  if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2692  Opc = AArch64::STRQui;
2693  else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2694  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2695  Opc = AArch64::ST1Twov1d;
2696  Offset = false;
2697  } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
2699  get(AArch64::STPXi), SrcReg, isKill,
2700  AArch64::sube64, AArch64::subo64, FI, MMO);
2701  return;
2702  }
2703  break;
2704  case 24:
2705  if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2706  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2707  Opc = AArch64::ST1Threev1d;
2708  Offset = false;
2709  }
2710  break;
2711  case 32:
2712  if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2713  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2714  Opc = AArch64::ST1Fourv1d;
2715  Offset = false;
2716  } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2717  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2718  Opc = AArch64::ST1Twov2d;
2719  Offset = false;
2720  }
2721  break;
2722  case 48:
2723  if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2724  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2725  Opc = AArch64::ST1Threev2d;
2726  Offset = false;
2727  }
2728  break;
2729  case 64:
2730  if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2731  assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2732  Opc = AArch64::ST1Fourv2d;
2733  Offset = false;
2734  }
2735  break;
2736  }
2737  assert(Opc && "Unknown register class");
2738 
2739  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
2740  .addReg(SrcReg, getKillRegState(isKill))
2741  .addFrameIndex(FI);
2742 
2743  if (Offset)
2744  MI.addImm(0);
2745  MI.addMemOperand(MMO);
2746 }
2747 
2749  MachineBasicBlock &MBB,
2750  MachineBasicBlock::iterator InsertBefore,
2751  const MCInstrDesc &MCID,
2752  unsigned DestReg, unsigned SubIdx0,
2753  unsigned SubIdx1, int FI,
2754  MachineMemOperand *MMO) {
2755  unsigned DestReg0 = DestReg;
2756  unsigned DestReg1 = DestReg;
2757  bool IsUndef = true;
2759  DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
2760  SubIdx0 = 0;
2761  DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
2762  SubIdx1 = 0;
2763  IsUndef = false;
2764  }
2765  BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
2766  .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
2767  .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
2768  .addFrameIndex(FI)
2769  .addImm(0)
2770  .addMemOperand(MMO);
2771 }
2772 
2774  MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
2775  int FI, const TargetRegisterClass *RC,
2776  const TargetRegisterInfo *TRI) const {
2777  MachineFunction &MF = *MBB.getParent();
2778  MachineFrameInfo &MFI = MF.getFrameInfo();
2779  unsigned Align = MFI.getObjectAlignment(FI);
2782  PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
2783 
2784  unsigned Opc = 0;
2785  bool Offset = true;
2786  switch (TRI->getSpillSize(*RC)) {
2787  case 1:
2788  if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2789  Opc = AArch64::LDRBui;
2790  break;
2791  case 2:
2792  if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2793  Opc = AArch64::LDRHui;
2794  break;
2795  case 4:
2796  if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2797  Opc = AArch64::LDRWui;
2799  MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
2800  else
2801  assert(DestReg != AArch64::WSP);
2802  } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2803  Opc = AArch64::LDRSui;
2804  break;
2805  case 8:
2806  if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2807  Opc = AArch64::LDRXui;
2809  MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
2810  else
2811  assert(DestReg != AArch64::SP);
2812  } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
2813  Opc = AArch64::LDRDui;
2814  } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
2816  get(AArch64::LDPWi), DestReg, AArch64::sube32,
2817  AArch64::subo32, FI, MMO);
2818  return;
2819  }
2820  break;
2821  case 16:
2822  if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2823  Opc = AArch64::LDRQui;
2824  else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2825  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2826  Opc = AArch64::LD1Twov1d;
2827  Offset = false;
2828  } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
2830  get(AArch64::LDPXi), DestReg, AArch64::sube64,
2831  AArch64::subo64, FI, MMO);
2832  return;
2833  }
2834  break;
2835  case 24:
2836  if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2837  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2838  Opc = AArch64::LD1Threev1d;
2839  Offset = false;
2840  }
2841  break;
2842  case 32:
2843  if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2844  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2845  Opc = AArch64::LD1Fourv1d;
2846  Offset = false;
2847  } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2848  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2849  Opc = AArch64::LD1Twov2d;
2850  Offset = false;
2851  }
2852  break;
2853  case 48:
2854  if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2855  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2856  Opc = AArch64::LD1Threev2d;
2857  Offset = false;
2858  }
2859  break;
2860  case 64:
2861  if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2862  assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2863  Opc = AArch64::LD1Fourv2d;
2864  Offset = false;
2865  }
2866  break;
2867  }
2868  assert(Opc && "Unknown register class");
2869 
2870  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
2871  .addReg(DestReg, getDefRegState(true))
2872  .addFrameIndex(FI);
2873  if (Offset)
2874  MI.addImm(0);
2875  MI.addMemOperand(MMO);
2876 }
2877 
2879  MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
2880  unsigned DestReg, unsigned SrcReg, int Offset,
2881  const TargetInstrInfo *TII,
2882  MachineInstr::MIFlag Flag, bool SetNZCV,
2883  bool NeedsWinCFI) {
2884  if (DestReg == SrcReg && Offset == 0)
2885  return;
2886 
2887  assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
2888  "SP increment/decrement not 16-byte aligned");
2889 
2890  bool isSub = Offset < 0;
2891  if (isSub)
2892  Offset = -Offset;
2893 
2894  // FIXME: If the offset won't fit in 24-bits, compute the offset into a
2895  // scratch register. If DestReg is a virtual register, use it as the
2896  // scratch register; otherwise, create a new virtual register (to be
2897  // replaced by the scavenger at the end of PEI). That case can be optimized
2898  // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
2899  // register can be loaded with offset%8 and the add/sub can use an extending
2900  // instruction with LSL#3.
2901  // Currently the function handles any offsets but generates a poor sequence
2902  // of code.
2903  // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
2904 
2905  unsigned Opc;
2906  if (SetNZCV)
2907  Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
2908  else
2909  Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
2910  const unsigned MaxEncoding = 0xfff;
2911  const unsigned ShiftSize = 12;
2912  const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
2913  while (((unsigned)Offset) >= (1 << ShiftSize)) {
2914  unsigned ThisVal;
2915  if (((unsigned)Offset) > MaxEncodableValue) {
2916  ThisVal = MaxEncodableValue;
2917  } else {
2918  ThisVal = Offset & MaxEncodableValue;
2919  }
2920  assert((ThisVal >> ShiftSize) <= MaxEncoding &&
2921  "Encoding cannot handle value that big");
2922  BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2923  .addReg(SrcReg)
2924  .addImm(ThisVal >> ShiftSize)
2926  .setMIFlag(Flag);
2927 
2928  if (NeedsWinCFI && SrcReg == AArch64::SP && DestReg == AArch64::SP)
2929  BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
2930  .addImm(ThisVal)
2931  .setMIFlag(Flag);
2932 
2933  SrcReg = DestReg;
2934  Offset -= ThisVal;
2935  if (Offset == 0)
2936  return;
2937  }
2938  BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2939  .addReg(SrcReg)
2940  .addImm(Offset)
2942  .setMIFlag(Flag);
2943 
2944  if (NeedsWinCFI) {
2945  if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
2946  (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
2947  if (Offset == 0)
2948  BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).
2949  setMIFlag(Flag);
2950  else
2951  BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP)).
2952  addImm(Offset).setMIFlag(Flag);
2953  } else if (DestReg == AArch64::SP) {
2954  BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)).
2955  addImm(Offset).setMIFlag(Flag);
2956  }
2957  }
2958 }
2959 
2963  LiveIntervals *LIS) const {
2964  // This is a bit of a hack. Consider this instruction:
2965  //
2966  // %0 = COPY %sp; GPR64all:%0
2967  //
2968  // We explicitly chose GPR64all for the virtual register so such a copy might
2969  // be eliminated by RegisterCoalescer. However, that may not be possible, and
2970  // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
2971  // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
2972  //
2973  // To prevent that, we are going to constrain the %0 register class here.
2974  //
2975  // <rdar://problem/11522048>
2976  //
2977  if (MI.isFullCopy()) {
2978  unsigned DstReg = MI.getOperand(0).getReg();
2979  unsigned SrcReg = MI.getOperand(1).getReg();
2980  if (SrcReg == AArch64::SP &&
2982  MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
2983  return nullptr;
2984  }
2985  if (DstReg == AArch64::SP &&
2987  MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2988  return nullptr;
2989  }
2990  }
2991 
2992  // Handle the case where a copy is being spilled or filled but the source
2993  // and destination register class don't match. For example:
2994  //
2995  // %0 = COPY %xzr; GPR64common:%0
2996  //
2997  // In this case we can still safely fold away the COPY and generate the
2998  // following spill code:
2999  //
3000  // STRXui %xzr, %stack.0
3001  //
3002  // This also eliminates spilled cross register class COPYs (e.g. between x and
3003  // d regs) of the same size. For example:
3004  //
3005  // %0 = COPY %1; GPR64:%0, FPR64:%1
3006  //
3007  // will be filled as
3008  //
3009  // LDRDui %0, fi<#0>
3010  //
3011  // instead of
3012  //
3013  // LDRXui %Temp, fi<#0>
3014  // %0 = FMOV %Temp
3015  //
3016  if (MI.isCopy() && Ops.size() == 1 &&
3017  // Make sure we're only folding the explicit COPY defs/uses.
3018  (Ops[0] == 0 || Ops[0] == 1)) {
3019  bool IsSpill = Ops[0] == 0;
3020  bool IsFill = !IsSpill;
3022  const MachineRegisterInfo &MRI = MF.getRegInfo();
3023  MachineBasicBlock &MBB = *MI.getParent();
3024  const MachineOperand &DstMO = MI.getOperand(0);
3025  const MachineOperand &SrcMO = MI.getOperand(1);
3026  unsigned DstReg = DstMO.getReg();
3027  unsigned SrcReg = SrcMO.getReg();
3028  // This is slightly expensive to compute for physical regs since
3029  // getMinimalPhysRegClass is slow.
3030  auto getRegClass = [&](unsigned Reg) {
3032  ? MRI.getRegClass(Reg)
3033  : TRI.getMinimalPhysRegClass(Reg);
3034  };
3035 
3036  if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
3037  assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
3038  TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
3039  "Mismatched register size in non subreg COPY");
3040  if (IsSpill)
3041  storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
3042  getRegClass(SrcReg), &TRI);
3043  else
3044  loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
3045  getRegClass(DstReg), &TRI);
3046  return &*--InsertPt;
3047  }
3048 
3049  // Handle cases like spilling def of:
3050  //
3051  // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
3052  //
3053  // where the physical register source can be widened and stored to the full
3054  // virtual reg destination stack slot, in this case producing:
3055  //
3056  // STRXui %xzr, %stack.0
3057  //
3058  if (IsSpill && DstMO.isUndef() &&
3060  assert(SrcMO.getSubReg() == 0 &&
3061  "Unexpected subreg on physical register");
3062  const TargetRegisterClass *SpillRC;
3063  unsigned SpillSubreg;
3064  switch (DstMO.getSubReg()) {
3065  default:
3066  SpillRC = nullptr;
3067  break;
3068  case AArch64::sub_32:
3069  case AArch64::ssub:
3070  if (AArch64::GPR32RegClass.contains(SrcReg)) {
3071  SpillRC = &AArch64::GPR64RegClass;
3072  SpillSubreg = AArch64::sub_32;
3073  } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
3074  SpillRC = &AArch64::FPR64RegClass;
3075  SpillSubreg = AArch64::ssub;
3076  } else
3077  SpillRC = nullptr;
3078  break;
3079  case AArch64::dsub:
3080  if (AArch64::FPR64RegClass.contains(SrcReg)) {
3081  SpillRC = &AArch64::FPR128RegClass;
3082  SpillSubreg = AArch64::dsub;
3083  } else
3084  SpillRC = nullptr;
3085  break;
3086  }
3087 
3088  if (SpillRC)
3089  if (unsigned WidenedSrcReg =
3090  TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
3091  storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
3092  FrameIndex, SpillRC, &TRI);
3093  return &*--InsertPt;
3094  }
3095  }
3096 
3097  // Handle cases like filling use of:
3098  //
3099  // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
3100  //
3101  // where we can load the full virtual reg source stack slot, into the subreg
3102  // destination, in this case producing:
3103  //
3104  // LDRWui %0:sub_32<def,read-undef>, %stack.0
3105  //
3106  if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
3107  const TargetRegisterClass *FillRC;
3108  switch (DstMO.getSubReg()) {
3109  default:
3110  FillRC = nullptr;
3111  break;
3112  case AArch64::sub_32:
3113  FillRC = &AArch64::GPR32RegClass;
3114  break;
3115  case AArch64::ssub:
3116  FillRC = &AArch64::FPR32RegClass;
3117  break;
3118  case AArch64::dsub:
3119  FillRC = &AArch64::FPR64RegClass;
3120  break;
3121  }
3122 
3123  if (FillRC) {
3124  assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
3125  TRI.getRegSizeInBits(*FillRC) &&
3126  "Mismatched regclass size on folded subreg COPY");
3127  loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
3128  MachineInstr &LoadMI = *--InsertPt;
3129  MachineOperand &LoadDst = LoadMI.getOperand(0);
3130  assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
3131  LoadDst.setSubReg(DstMO.getSubReg());
3132  LoadDst.setIsUndef();
3133  return &LoadMI;
3134  }
3135  }
3136  }
3137 
3138  // Cannot fold.
3139  return nullptr;
3140 }
3141 
3143  bool *OutUseUnscaledOp,
3144  unsigned *OutUnscaledOp,
3145  int *EmittableOffset) {
3146  int Scale = 1;
3147  bool IsSigned = false;
3148  // The ImmIdx should be changed case by case if it is not 2.
3149  unsigned ImmIdx = 2;
3150  unsigned UnscaledOp = 0;
3151  // Set output values in case of early exit.
3152  if (EmittableOffset)
3153  *EmittableOffset = 0;
3154  if (OutUseUnscaledOp)
3155  *OutUseUnscaledOp = false;
3156  if (OutUnscaledOp)
3157  *OutUnscaledOp = 0;
3158  switch (MI.getOpcode()) {
3159  default:
3160  llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
3161  // Vector spills/fills can't take an immediate offset.
3162  case AArch64::LD1Twov2d:
3163  case AArch64::LD1Threev2d:
3164  case AArch64::LD1Fourv2d:
3165  case AArch64::LD1Twov1d:
3166  case AArch64::LD1Threev1d:
3167  case AArch64::LD1Fourv1d:
3168  case AArch64::ST1Twov2d:
3169  case AArch64::ST1Threev2d:
3170  case AArch64::ST1Fourv2d:
3171  case AArch64::ST1Twov1d:
3172  case AArch64::ST1Threev1d:
3173  case AArch64::ST1Fourv1d:
3175  case AArch64::PRFMui:
3176  Scale = 8;
3177  UnscaledOp = AArch64::PRFUMi;
3178  break;
3179  case AArch64::LDRXui:
3180  Scale = 8;
3181  UnscaledOp = AArch64::LDURXi;
3182  break;
3183  case AArch64::LDRWui:
3184  Scale = 4;
3185  UnscaledOp = AArch64::LDURWi;
3186  break;
3187  case AArch64::LDRBui:
3188  Scale = 1;
3189  UnscaledOp = AArch64::LDURBi;
3190  break;
3191  case AArch64::LDRHui:
3192  Scale = 2;
3193  UnscaledOp = AArch64::LDURHi;
3194  break;
3195  case AArch64::LDRSui:
3196  Scale = 4;
3197  UnscaledOp = AArch64::LDURSi;
3198  break;
3199  case AArch64::LDRDui:
3200  Scale = 8;
3201  UnscaledOp = AArch64::LDURDi;
3202  break;
3203  case AArch64::LDRQui:
3204  Scale = 16;
3205  UnscaledOp = AArch64::LDURQi;
3206  break;
3207  case AArch64::LDRBBui:
3208  Scale = 1;
3209  UnscaledOp = AArch64::LDURBBi;
3210  break;
3211  case AArch64::LDRHHui:
3212  Scale = 2;
3213  UnscaledOp = AArch64::LDURHHi;
3214  break;
3215  case AArch64::LDRSBXui:
3216  Scale = 1;
3217  UnscaledOp = AArch64::LDURSBXi;
3218  break;
3219  case AArch64::LDRSBWui:
3220  Scale = 1;
3221  UnscaledOp = AArch64::LDURSBWi;
3222  break;
3223  case AArch64::LDRSHXui:
3224  Scale = 2;
3225  UnscaledOp = AArch64::LDURSHXi;
3226  break;
3227  case AArch64::LDRSHWui:
3228  Scale = 2;
3229  UnscaledOp = AArch64::LDURSHWi;
3230  break;
3231  case AArch64::LDRSWui:
3232  Scale = 4;
3233  UnscaledOp = AArch64::LDURSWi;
3234  break;
3235 
3236  case AArch64::STRXui:
3237  Scale = 8;
3238  UnscaledOp = AArch64::STURXi;
3239  break;
3240  case AArch64::STRWui:
3241  Scale = 4;
3242  UnscaledOp = AArch64::STURWi;
3243  break;
3244  case AArch64::STRBui:
3245  Scale = 1;
3246  UnscaledOp = AArch64::STURBi;
3247  break;
3248  case AArch64::STRHui:
3249  Scale = 2;
3250  UnscaledOp = AArch64::STURHi;
3251  break;
3252  case AArch64::STRSui:
3253  Scale = 4;
3254  UnscaledOp = AArch64::STURSi;
3255  break;
3256  case AArch64::STRDui:
3257  Scale = 8;
3258  UnscaledOp = AArch64::STURDi;
3259  break;
3260  case AArch64::STRQui:
3261  Scale = 16;
3262  UnscaledOp = AArch64::STURQi;
3263  break;
3264  case AArch64::STRBBui:
3265  Scale = 1;
3266  UnscaledOp = AArch64::STURBBi;
3267  break;
3268  case AArch64::STRHHui:
3269  Scale = 2;
3270  UnscaledOp = AArch64::STURHHi;
3271  break;
3272 
3273  case AArch64::LDPXi:
3274  case AArch64::LDPDi:
3275  case AArch64::STPXi:
3276  case AArch64::STPDi:
3277  case AArch64::LDNPXi:
3278  case AArch64::LDNPDi:
3279  case AArch64::STNPXi:
3280  case AArch64::STNPDi:
3281  ImmIdx = 3;
3282  IsSigned = true;
3283  Scale = 8;
3284  break;
3285  case AArch64::LDPQi:
3286  case AArch64::STPQi:
3287  case AArch64::LDNPQi:
3288  case AArch64::STNPQi:
3289  ImmIdx = 3;
3290  IsSigned = true;
3291  Scale = 16;
3292  break;
3293  case AArch64::LDPWi:
3294  case AArch64::LDPSi:
3295  case AArch64::STPWi:
3296  case AArch64::STPSi:
3297  case AArch64::LDNPWi:
3298  case AArch64::LDNPSi:
3299  case AArch64::STNPWi:
3300  case AArch64::STNPSi:
3301  ImmIdx = 3;
3302  IsSigned = true;
3303  Scale = 4;
3304  break;
3305 
3306  case AArch64::LDURXi:
3307  case AArch64::LDURWi:
3308  case AArch64::LDURBi:
3309  case AArch64::LDURHi:
3310  case AArch64::LDURSi:
3311  case AArch64::LDURDi:
3312  case AArch64::LDURQi:
3313  case AArch64::LDURHHi:
3314  case AArch64::LDURBBi:
3315  case AArch64::LDURSBXi:
3316  case AArch64::LDURSBWi:
3317  case AArch64::LDURSHXi:
3318  case AArch64::LDURSHWi:
3319  case AArch64::LDURSWi:
3320  case AArch64::STURXi:
3321  case AArch64::STURWi:
3322  case AArch64::STURBi:
3323  case AArch64::STURHi:
3324  case AArch64::STURSi:
3325  case AArch64::STURDi:
3326  case AArch64::STURQi:
3327  case AArch64::STURBBi:
3328  case AArch64::STURHHi:
3329  Scale = 1;
3330  break;
3331  }
3332 
3333  Offset += MI.getOperand(ImmIdx).getImm() * Scale;
3334 
3335  bool useUnscaledOp = false;
3336  // If the offset doesn't match the scale, we rewrite the instruction to
3337  // use the unscaled instruction instead. Likewise, if we have a negative
3338  // offset (and have an unscaled op to use).
3339  if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
3340  useUnscaledOp = true;
3341 
3342  // Use an unscaled addressing mode if the instruction has a negative offset
3343  // (or if the instruction is already using an unscaled addressing mode).
3344  unsigned MaskBits;
3345  if (IsSigned) {
3346  // ldp/stp instructions.
3347  MaskBits = 7;
3348  Offset /= Scale;
3349  } else if (UnscaledOp == 0 || useUnscaledOp) {
3350  MaskBits = 9;
3351  IsSigned = true;
3352  Scale = 1;
3353  } else {
3354  MaskBits = 12;
3355  IsSigned = false;
3356  Offset /= Scale;
3357  }
3358 
3359  // Attempt to fold address computation.
3360  int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
3361  int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
3362  if (Offset >= MinOff && Offset <= MaxOff) {
3363  if (EmittableOffset)
3364  *EmittableOffset = Offset;
3365  Offset = 0;
3366  } else {
3367  int NewOff = Offset < 0 ? MinOff : MaxOff;
3368  if (EmittableOffset)
3369  *EmittableOffset = NewOff;
3370  Offset = (Offset - NewOff) * Scale;
3371  }
3372  if (OutUseUnscaledOp)
3373  *OutUseUnscaledOp = useUnscaledOp;
3374  if (OutUnscaledOp)
3375  *OutUnscaledOp = UnscaledOp;
3377  (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
3378 }
3379 
3380 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
3381  unsigned FrameReg, int &Offset,
3382  const AArch64InstrInfo *TII) {
3383  unsigned Opcode = MI.getOpcode();
3384  unsigned ImmIdx = FrameRegIdx + 1;
3385 
3386  if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
3387  Offset += MI.getOperand(ImmIdx).getImm();
3388  emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
3389  MI.getOperand(0).getReg(), FrameReg, Offset, TII,
3390  MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
3391  MI.eraseFromParent();
3392  Offset = 0;
3393  return true;
3394  }
3395 
3396  int NewOffset;
3397  unsigned UnscaledOp;
3398  bool UseUnscaledOp;
3399  int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
3400  &UnscaledOp, &NewOffset);
3401  if (Status & AArch64FrameOffsetCanUpdate) {
3402  if (Status & AArch64FrameOffsetIsLegal)
3403  // Replace the FrameIndex with FrameReg.
3404  MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
3405  if (UseUnscaledOp)
3406  MI.setDesc(TII->get(UnscaledOp));
3407 
3408  MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
3409  return Offset == 0;
3410  }
3411 
3412  return false;
3413 }
3414 
3415 void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
3416  NopInst.setOpcode(AArch64::HINT);
3417  NopInst.addOperand(MCOperand::createImm(0));
3418 }
3419 
3420 // AArch64 supports MachineCombiner.
3421 bool AArch64InstrInfo::useMachineCombiner() const { return true; }
3422 
3423 // True when Opc sets flag
3424 static bool isCombineInstrSettingFlag(unsigned Opc) {
3425  switch (Opc) {
3426  case AArch64::ADDSWrr:
3427  case AArch64::ADDSWri:
3428  case AArch64::ADDSXrr:
3429  case AArch64::ADDSXri:
3430  case AArch64::SUBSWrr:
3431  case AArch64::SUBSXrr:
3432  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3433  case AArch64::SUBSWri:
3434  case AArch64::SUBSXri:
3435  return true;
3436  default:
3437  break;
3438  }
3439  return false;
3440 }
3441 
3442 // 32b Opcodes that can be combined with a MUL
3443 static bool isCombineInstrCandidate32(unsigned Opc) {
3444  switch (Opc) {
3445  case AArch64::ADDWrr:
3446  case AArch64::ADDWri:
3447  case AArch64::SUBWrr:
3448  case AArch64::ADDSWrr:
3449  case AArch64::ADDSWri:
3450  case AArch64::SUBSWrr:
3451  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3452  case AArch64::SUBWri:
3453  case AArch64::SUBSWri:
3454  return true;
3455  default:
3456  break;
3457  }
3458  return false;
3459 }
3460 
3461 // 64b Opcodes that can be combined with a MUL
3462 static bool isCombineInstrCandidate64(unsigned Opc) {
3463  switch (Opc) {
3464  case AArch64::ADDXrr:
3465  case AArch64::ADDXri:
3466  case AArch64::SUBXrr:
3467  case AArch64::ADDSXrr:
3468  case AArch64::ADDSXri:
3469  case AArch64::SUBSXrr:
3470  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3471  case AArch64::SUBXri:
3472  case AArch64::SUBSXri:
3473  return true;
3474  default:
3475  break;
3476  }
3477  return false;
3478 }
3479 
3480 // FP Opcodes that can be combined with a FMUL
3481 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
3482  switch (Inst.getOpcode()) {
3483  default:
3484  break;
3485  case AArch64::FADDSrr:
3486  case AArch64::FADDDrr:
3487  case AArch64::FADDv2f32:
3488  case AArch64::FADDv2f64:
3489  case AArch64::FADDv4f32:
3490  case AArch64::FSUBSrr:
3491  case AArch64::FSUBDrr:
3492  case AArch64::FSUBv2f32:
3493  case AArch64::FSUBv2f64:
3494  case AArch64::FSUBv4f32:
3495  TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
3496  return (Options.UnsafeFPMath ||
3497  Options.AllowFPOpFusion == FPOpFusion::Fast);
3498  }
3499  return false;
3500 }
3501 
3502 // Opcodes that can be combined with a MUL
3503 static bool isCombineInstrCandidate(unsigned Opc) {
3505 }
3506 
3507 //
3508 // Utility routine that checks if \param MO is defined by an
3509 // \param CombineOpc instruction in the basic block \param MBB
3511  unsigned CombineOpc, unsigned ZeroReg = 0,
3512  bool CheckZeroReg = false) {
3513  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3514  MachineInstr *MI = nullptr;
3515 
3517  MI = MRI.getUniqueVRegDef(MO.getReg());
3518  // And it needs to be in the trace (otherwise, it won't have a depth).
3519  if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
3520  return false;
3521  // Must only used by the user we combine with.
3522  if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
3523  return false;
3524 
3525  if (CheckZeroReg) {
3526  assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
3527  MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
3528  MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
3529  // The third input reg must be zero.
3530  if (MI->getOperand(3).getReg() != ZeroReg)
3531  return false;
3532  }
3533 
3534  return true;
3535 }
3536 
3537 //
3538 // Is \param MO defined by an integer multiply and can be combined?
3540  unsigned MulOpc, unsigned ZeroReg) {
3541  return canCombine(MBB, MO, MulOpc, ZeroReg, true);
3542 }
3543 
3544 //
3545 // Is \param MO defined by a floating-point multiply and can be combined?
3547  unsigned MulOpc) {
3548  return canCombine(MBB, MO, MulOpc);
3549 }
3550 
3551 // TODO: There are many more machine instruction opcodes to match:
3552 // 1. Other data types (integer, vectors)
3553 // 2. Other math / logic operations (xor, or)
3554 // 3. Other forms of the same operation (intrinsics and other variants)
3556  const MachineInstr &Inst) const {
3557  switch (Inst.getOpcode()) {
3558  case AArch64::FADDDrr:
3559  case AArch64::FADDSrr:
3560  case AArch64::FADDv2f32:
3561  case AArch64::FADDv2f64:
3562  case AArch64::FADDv4f32:
3563  case AArch64::FMULDrr:
3564  case AArch64::FMULSrr:
3565  case AArch64::FMULX32:
3566  case AArch64::FMULX64:
3567  case AArch64::FMULXv2f32:
3568  case AArch64::FMULXv2f64:
3569  case AArch64::FMULXv4f32:
3570  case AArch64::FMULv2f32:
3571  case AArch64::FMULv2f64:
3572  case AArch64::FMULv4f32:
3573  return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
3574  default:
3575  return false;
3576  }
3577 }
3578 
3579 /// Find instructions that can be turned into madd.
3580 static bool getMaddPatterns(MachineInstr &Root,
3582  unsigned Opc = Root.getOpcode();
3583  MachineBasicBlock &MBB = *Root.getParent();
3584  bool Found = false;
3585 
3586  if (!isCombineInstrCandidate(Opc))
3587  return false;
3588  if (isCombineInstrSettingFlag(Opc)) {
3589  int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
3590  // When NZCV is live bail out.
3591  if (Cmp_NZCV == -1)
3592  return false;
3593  unsigned NewOpc = convertToNonFlagSettingOpc(Root);
3594  // When opcode can't change bail out.
3595  // CHECKME: do we miss any cases for opcode conversion?
3596  if (NewOpc == Opc)
3597  return false;
3598  Opc = NewOpc;
3599  }
3600 
3601  switch (Opc) {
3602  default:
3603  break;
3604  case AArch64::ADDWrr:
3605  assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3606  "ADDWrr does not have register operands");
3607  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3608  AArch64::WZR)) {
3610  Found = true;
3611  }
3612  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3613  AArch64::WZR)) {
3615  Found = true;
3616  }
3617  break;
3618  case AArch64::ADDXrr:
3619  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3620  AArch64::XZR)) {
3622  Found = true;
3623  }
3624  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3625  AArch64::XZR)) {
3627  Found = true;
3628  }
3629  break;
3630  case AArch64::SUBWrr:
3631  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3632  AArch64::WZR)) {
3634  Found = true;
3635  }
3636  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3637  AArch64::WZR)) {
3639  Found = true;
3640  }
3641  break;
3642  case AArch64::SUBXrr:
3643  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3644  AArch64::XZR)) {
3646  Found = true;
3647  }
3648  if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3649  AArch64::XZR)) {
3651  Found = true;
3652  }
3653  break;
3654  case AArch64::ADDWri:
3655  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3656  AArch64::WZR)) {
3658  Found = true;
3659  }
3660  break;
3661  case AArch64::ADDXri:
3662  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3663  AArch64::XZR)) {
3665  Found = true;
3666  }
3667  break;
3668  case AArch64::SUBWri:
3669  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3670  AArch64::WZR)) {
3672  Found = true;
3673  }
3674  break;
3675  case AArch64::SUBXri:
3676  if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3677  AArch64::XZR)) {
3679  Found = true;
3680  }
3681  break;
3682  }
3683  return Found;
3684 }
3685 /// Floating-Point Support
3686 
3687 /// Find instructions that can be turned into madd.
3688 static bool getFMAPatterns(MachineInstr &Root,
3690 
3691  if (!isCombineInstrCandidateFP(Root))
3692  return false;
3693 
3694  MachineBasicBlock &MBB = *Root.getParent();
3695  bool Found = false;
3696 
3697  switch (Root.getOpcode()) {
3698  default:
3699  assert(false && "Unsupported FP instruction in combiner\n");
3700  break;
3701  case AArch64::FADDSrr:
3702  assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3703  "FADDWrr does not have register operands");
3704  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3706  Found = true;
3707  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3708  AArch64::FMULv1i32_indexed)) {
3710  Found = true;
3711  }
3712  if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3714  Found = true;
3715  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3716  AArch64::FMULv1i32_indexed)) {
3718  Found = true;
3719  }
3720  break;
3721  case AArch64::FADDDrr:
3722  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3724  Found = true;
3725  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3726  AArch64::FMULv1i64_indexed)) {
3728  Found = true;
3729  }
3730  if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3732  Found = true;
3733  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3734  AArch64::FMULv1i64_indexed)) {
3736  Found = true;
3737  }
3738  break;
3739  case AArch64::FADDv2f32:
3740  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3741  AArch64::FMULv2i32_indexed)) {
3743  Found = true;
3744  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3745  AArch64::FMULv2f32)) {
3747  Found = true;
3748  }
3749  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3750  AArch64::FMULv2i32_indexed)) {
3752  Found = true;
3753  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3754  AArch64::FMULv2f32)) {
3756  Found = true;
3757  }
3758  break;
3759  case AArch64::FADDv2f64:
3760  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3761  AArch64::FMULv2i64_indexed)) {
3763  Found = true;
3764  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3765  AArch64::FMULv2f64)) {
3767  Found = true;
3768  }
3769  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3770  AArch64::FMULv2i64_indexed)) {
3772  Found = true;
3773  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3774  AArch64::FMULv2f64)) {
3776  Found = true;
3777  }
3778  break;
3779  case AArch64::FADDv4f32:
3780  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3781  AArch64::FMULv4i32_indexed)) {
3783  Found = true;
3784  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3785  AArch64::FMULv4f32)) {
3787  Found = true;
3788  }
3789  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3790  AArch64::FMULv4i32_indexed)) {
3792  Found = true;
3793  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3794  AArch64::FMULv4f32)) {
3796  Found = true;
3797  }
3798  break;
3799 
3800  case AArch64::FSUBSrr:
3801  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3803  Found = true;
3804  }
3805  if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3807  Found = true;
3808  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3809  AArch64::FMULv1i32_indexed)) {
3811  Found = true;
3812  }
3813  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) {
3815  Found = true;
3816  }
3817  break;
3818  case AArch64::FSUBDrr:
3819  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3821  Found = true;
3822  }
3823  if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3825  Found = true;
3826  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3827  AArch64::FMULv1i64_indexed)) {
3829  Found = true;
3830  }
3831  if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) {
3833  Found = true;
3834  }
3835  break;
3836  case AArch64::FSUBv2f32:
3837  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3838  AArch64::FMULv2i32_indexed)) {
3840  Found = true;
3841  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3842  AArch64::FMULv2f32)) {
3844  Found = true;
3845  }
3846  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3847  AArch64::FMULv2i32_indexed)) {
3849  Found = true;
3850  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3851  AArch64::FMULv2f32)) {
3853  Found = true;
3854  }
3855  break;
3856  case AArch64::FSUBv2f64:
3857  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3858  AArch64::FMULv2i64_indexed)) {
3860  Found = true;
3861  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3862  AArch64::FMULv2f64)) {
3864  Found = true;
3865  }
3866  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3867  AArch64::FMULv2i64_indexed)) {
3869  Found = true;
3870  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3871  AArch64::FMULv2f64)) {
3873  Found = true;
3874  }
3875  break;
3876  case AArch64::FSUBv4f32:
3877  if (canCombineWithFMUL(MBB, Root.getOperand(2),
3878  AArch64::FMULv4i32_indexed)) {
3880  Found = true;
3881  } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3882  AArch64::FMULv4f32)) {
3884  Found = true;
3885  }
3886  if (canCombineWithFMUL(MBB, Root.getOperand(1),
3887  AArch64::FMULv4i32_indexed)) {
3889  Found = true;
3890  } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3891  AArch64::FMULv4f32)) {
3893  Found = true;
3894  }
3895  break;
3896  }
3897  return Found;
3898 }
3899 
3900 /// Return true when a code sequence can improve throughput. It
3901 /// should be called only for instructions in loops.
3902 /// \param Pattern - combiner pattern
3904  MachineCombinerPattern Pattern) const {
3905  switch (Pattern) {
3906  default:
3907  break;
3942  return true;
3943  } // end switch (Pattern)
3944  return false;
3945 }
3946 /// Return true when there is potentially a faster code sequence for an
3947 /// instruction chain ending in \p Root. All potential patterns are listed in
3948 /// the \p Pattern vector. Pattern should be sorted in priority order since the
3949 /// pattern evaluator stops checking as soon as it finds a faster sequence.
3950 
3952  MachineInstr &Root,
3953  SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
3954  // Integer patterns
3955  if (getMaddPatterns(Root, Patterns))
3956  return true;
3957  // Floating point patterns
3958  if (getFMAPatterns(Root, Patterns))
3959  return true;
3960 
3961  return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
3962 }
3963 
3965 /// genFusedMultiply - Generate fused multiply instructions.
3966 /// This function supports both integer and floating point instructions.
3967 /// A typical example:
3968 /// F|MUL I=A,B,0
3969 /// F|ADD R,I,C
3970 /// ==> F|MADD R,A,B,C
3971 /// \param MF Containing MachineFunction
3972 /// \param MRI Register information
3973 /// \param TII Target information
3974 /// \param Root is the F|ADD instruction
3975 /// \param [out] InsInstrs is a vector of machine instructions and will
3976 /// contain the generated madd instruction
3977 /// \param IdxMulOpd is index of operand in Root that is the result of
3978 /// the F|MUL. In the example above IdxMulOpd is 1.
3979 /// \param MaddOpc the opcode fo the f|madd instruction
3980 /// \param RC Register class of operands
3981 /// \param kind of fma instruction (addressing mode) to be generated
3982 /// \param ReplacedAddend is the result register from the instruction
3983 /// replacing the non-combined operand, if any.
3984 static MachineInstr *
3986  const TargetInstrInfo *TII, MachineInstr &Root,
3987  SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
3988  unsigned MaddOpc, const TargetRegisterClass *RC,
3990  const unsigned *ReplacedAddend = nullptr) {
3991  assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3992 
3993  unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
3994  MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
3995  unsigned ResultReg = Root.getOperand(0).getReg();
3996  unsigned SrcReg0 = MUL->getOperand(1).getReg();
3997  bool Src0IsKill = MUL->getOperand(1).isKill();
3998  unsigned SrcReg1 = MUL->getOperand(2).getReg();
3999  bool Src1IsKill = MUL->getOperand(2).isKill();
4000 
4001  unsigned SrcReg2;
4002  bool Src2IsKill;
4003  if (ReplacedAddend) {
4004  // If we just generated a new addend, we must be it's only use.
4005  SrcReg2 = *ReplacedAddend;
4006  Src2IsKill = true;
4007  } else {
4008  SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
4009  Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
4010  }
4011 
4013  MRI.constrainRegClass(ResultReg, RC);
4015  MRI.constrainRegClass(SrcReg0, RC);
4017  MRI.constrainRegClass(SrcReg1, RC);
4019  MRI.constrainRegClass(SrcReg2, RC);
4020 
4021  MachineInstrBuilder MIB;
4022  if (kind == FMAInstKind::Default)
4023  MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4024  .addReg(SrcReg0, getKillRegState(Src0IsKill))
4025  .addReg(SrcReg1, getKillRegState(Src1IsKill))
4026  .addReg(SrcReg2, getKillRegState(Src2IsKill));
4027  else if (kind == FMAInstKind::Indexed)
4028  MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4029  .addReg(SrcReg2, getKillRegState(Src2IsKill))
4030  .addReg(SrcReg0, getKillRegState(Src0IsKill))
4031  .addReg(SrcReg1, getKillRegState(Src1IsKill))
4032  .addImm(MUL->getOperand(3).getImm());
4033  else if (kind == FMAInstKind::Accumulator)
4034  MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4035  .addReg(SrcReg2, getKillRegState(Src2IsKill))
4036  .addReg(SrcReg0, getKillRegState(Src0IsKill))
4037  .addReg(SrcReg1, getKillRegState(Src1IsKill));
4038  else
4039  assert(false && "Invalid FMA instruction kind \n");
4040  // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
4041  InsInstrs.push_back(MIB);
4042  return MUL;
4043 }
4044 
4045 /// genMaddR - Generate madd instruction and combine mul and add using
4046 /// an extra virtual register
4047 /// Example - an ADD intermediate needs to be stored in a register:
4048 /// MUL I=A,B,0
4049 /// ADD R,I,Imm
4050 /// ==> ORR V, ZR, Imm
4051 /// ==> MADD R,A,B,V
4052 /// \param MF Containing MachineFunction
4053 /// \param MRI Register information
4054 /// \param TII Target information
4055 /// \param Root is the ADD instruction
4056 /// \param [out] InsInstrs is a vector of machine instructions and will
4057 /// contain the generated madd instruction
4058 /// \param IdxMulOpd is index of operand in Root that is the result of
4059 /// the MUL. In the example above IdxMulOpd is 1.
4060 /// \param MaddOpc the opcode fo the madd instruction
4061 /// \param VR is a virtual register that holds the value of an ADD operand
4062 /// (V in the example above).
4063 /// \param RC Register class of operands
4065  const TargetInstrInfo *TII, MachineInstr &Root,
4067  unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
4068  const TargetRegisterClass *RC) {
4069  assert(IdxMulOpd == 1 || IdxMulOpd == 2);
4070 
4071  MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
4072  unsigned ResultReg = Root.getOperand(0).getReg();
4073  unsigned SrcReg0 = MUL->getOperand(1).getReg();
4074  bool Src0IsKill = MUL->getOperand(1).isKill();
4075  unsigned SrcReg1 = MUL->getOperand(2).getReg();
4076  bool Src1IsKill = MUL->getOperand(2).isKill();
4077 
4079  MRI.constrainRegClass(ResultReg, RC);
4081  MRI.constrainRegClass(SrcReg0, RC);
4083  MRI.constrainRegClass(SrcReg1, RC);
4085  MRI.constrainRegClass(VR, RC);
4086 
4087  MachineInstrBuilder MIB =
4088  BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4089  .addReg(SrcReg0, getKillRegState(Src0IsKill))
4090  .addReg(SrcReg1, getKillRegState(Src1IsKill))
4091  .addReg(VR);
4092  // Insert the MADD
4093  InsInstrs.push_back(MIB);
4094  return MUL;
4095 }
4096 
4097 /// When getMachineCombinerPatterns() finds potential patterns,
4098 /// this function generates the instructions that could replace the
4099 /// original code sequence
4101  MachineInstr &Root, MachineCombinerPattern Pattern,
4104  DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
4105  MachineBasicBlock &MBB = *Root.getParent();
4106  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
4107  MachineFunction &MF = *MBB.getParent();
4108  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
4109 
4110  MachineInstr *MUL;
4111  const TargetRegisterClass *RC;
4112  unsigned Opc;
4113  switch (Pattern) {
4114  default:
4115  // Reassociate instructions.
4116  TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
4117  DelInstrs, InstrIdxForVirtReg);
4118  return;
4121  // MUL I=A,B,0
4122  // ADD R,I,C
4123  // ==> MADD R,A,B,C
4124  // --- Create(MADD);
4125  if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
4126  Opc = AArch64::MADDWrrr;
4127  RC = &AArch64::GPR32RegClass;
4128  } else {
4129  Opc = AArch64::MADDXrrr;
4130  RC = &AArch64::GPR64RegClass;
4131  }
4132  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4133  break;
4136  // MUL I=A,B,0
4137  // ADD R,C,I
4138  // ==> MADD R,A,B,C
4139  // --- Create(MADD);
4140  if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
4141  Opc = AArch64::MADDWrrr;
4142  RC = &AArch64::GPR32RegClass;
4143  } else {
4144  Opc = AArch64::MADDXrrr;
4145  RC = &AArch64::GPR64RegClass;
4146  }
4147  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4148  break;
4151  // MUL I=A,B,0
4152  // ADD R,I,Imm
4153  // ==> ORR V, ZR, Imm
4154  // ==> MADD R,A,B,V
4155  // --- Create(MADD);
4156  const TargetRegisterClass *OrrRC;
4157  unsigned BitSize, OrrOpc, ZeroReg;
4158  if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
4159  OrrOpc = AArch64::ORRWri;
4160  OrrRC = &AArch64::GPR32spRegClass;
4161  BitSize = 32;
4162  ZeroReg = AArch64::WZR;
4163  Opc = AArch64::MADDWrrr;
4164  RC = &AArch64::GPR32RegClass;
4165  } else {
4166  OrrOpc = AArch64::ORRXri;
4167  OrrRC = &AArch64::GPR64spRegClass;
4168  BitSize = 64;
4169  ZeroReg = AArch64::XZR;
4170  Opc = AArch64::MADDXrrr;
4171  RC = &AArch64::GPR64RegClass;
4172  }
4173  unsigned NewVR = MRI.createVirtualRegister(OrrRC);
4174  uint64_t Imm = Root.getOperand(2).getImm();
4175 
4176  if (Root.getOperand(3).isImm()) {
4177  unsigned Val = Root.getOperand(3).getImm();
4178  Imm = Imm << Val;
4179  }
4180  uint64_t UImm = SignExtend64(Imm, BitSize);
4181  uint64_t Encoding;
4182  if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4183  MachineInstrBuilder MIB1 =
4184  BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4185  .addReg(ZeroReg)
4186  .addImm(Encoding);
4187  InsInstrs.push_back(MIB1);
4188  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4189  MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4190  }
4191  break;
4192  }
4195  // MUL I=A,B,0
4196  // SUB R,I, C
4197  // ==> SUB V, 0, C
4198  // ==> MADD R,A,B,V // = -C + A*B
4199  // --- Create(MADD);
4200  const TargetRegisterClass *SubRC;
4201  unsigned SubOpc, ZeroReg;
4202  if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
4203  SubOpc = AArch64::SUBWrr;
4204  SubRC = &AArch64::GPR32spRegClass;
4205  ZeroReg = AArch64::WZR;
4206  Opc = AArch64::MADDWrrr;
4207  RC = &AArch64::GPR32RegClass;
4208  } else {
4209  SubOpc = AArch64::SUBXrr;
4210  SubRC = &AArch64::GPR64spRegClass;
4211  ZeroReg = AArch64::XZR;
4212  Opc = AArch64::MADDXrrr;
4213  RC = &AArch64::GPR64RegClass;
4214  }
4215  unsigned NewVR = MRI.createVirtualRegister(SubRC);
4216  // SUB NewVR, 0, C
4217  MachineInstrBuilder MIB1 =
4218  BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
4219  .addReg(ZeroReg)
4220  .add(Root.getOperand(2));
4221  InsInstrs.push_back(MIB1);
4222  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4223  MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4224  break;
4225  }
4228  // MUL I=A,B,0
4229  // SUB R,C,I
4230  // ==> MSUB R,A,B,C (computes C - A*B)
4231  // --- Create(MSUB);
4232  if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
4233  Opc = AArch64::MSUBWrrr;
4234  RC = &AArch64::GPR32RegClass;
4235  } else {
4236  Opc = AArch64::MSUBXrrr;
4237  RC = &AArch64::GPR64RegClass;
4238  }
4239  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4240  break;
4243  // MUL I=A,B,0
4244  // SUB R,I, Imm
4245  // ==> ORR V, ZR, -Imm
4246  // ==> MADD R,A,B,V // = -Imm + A*B
4247  // --- Create(MADD);
4248  const TargetRegisterClass *OrrRC;
4249  unsigned BitSize, OrrOpc, ZeroReg;
4250  if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
4251  OrrOpc = AArch64::ORRWri;
4252  OrrRC = &AArch64::GPR32spRegClass;
4253  BitSize = 32;
4254  ZeroReg = AArch64::WZR;
4255  Opc = AArch64::MADDWrrr;
4256  RC = &AArch64::GPR32RegClass;
4257  } else {
4258  OrrOpc = AArch64::ORRXri;
4259  OrrRC = &AArch64::GPR64spRegClass;
4260  BitSize = 64;
4261  ZeroReg = AArch64::XZR;
4262  Opc = AArch64::MADDXrrr;
4263  RC = &AArch64::GPR64RegClass;
4264  }
4265  unsigned NewVR = MRI.createVirtualRegister(OrrRC);
4266  uint64_t Imm = Root.getOperand(2).getImm();
4267  if (Root.getOperand(3).isImm()) {
4268  unsigned Val = Root.getOperand(3).getImm();
4269  Imm = Imm << Val;
4270  }
4271  uint64_t UImm = SignExtend64(-Imm, BitSize);
4272  uint64_t Encoding;
4273  if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4274  MachineInstrBuilder MIB1 =
4275  BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4276  .addReg(ZeroReg)
4277  .addImm(Encoding);
4278  InsInstrs.push_back(MIB1);
4279  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4280  MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4281  }
4282  break;
4283  }
4284  // Floating Point Support
4287  // MUL I=A,B,0
4288  // ADD R,I,C
4289  // ==> MADD R,A,B,C
4290  // --- Create(MADD);
4291  if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
4292  Opc = AArch64::FMADDSrrr;
4293  RC = &AArch64::FPR32RegClass;
4294  } else {
4295  Opc = AArch64::FMADDDrrr;
4296  RC = &AArch64::FPR64RegClass;
4297  }
4298  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4299  break;
4302  // FMUL I=A,B,0
4303  // FADD R,C,I
4304  // ==> FMADD R,A,B,C
4305  // --- Create(FMADD);
4306  if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
4307  Opc = AArch64::FMADDSrrr;
4308  RC = &AArch64::FPR32RegClass;
4309  } else {
4310  Opc = AArch64::FMADDDrrr;
4311  RC = &AArch64::FPR64RegClass;
4312  }
4313  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4314  break;
4315 
4317  Opc = AArch64::FMLAv1i32_indexed;
4318  RC = &AArch64::FPR32RegClass;
4319  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4321  break;
4323  Opc = AArch64::FMLAv1i32_indexed;
4324  RC = &AArch64::FPR32RegClass;
4325  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4327  break;
4328 
4330  Opc = AArch64::FMLAv1i64_indexed;
4331  RC = &AArch64::FPR64RegClass;
4332  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4334  break;
4336  Opc = AArch64::FMLAv1i64_indexed;
4337  RC = &AArch64::FPR64RegClass;
4338  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4340  break;
4341 
4344  RC = &AArch64::FPR64RegClass;
4346  Opc = AArch64::FMLAv2i32_indexed;
4347  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4349  } else {
4350  Opc = AArch64::FMLAv2f32;
4351  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4353  }
4354  break;
4357  RC = &AArch64::FPR64RegClass;
4359  Opc = AArch64::FMLAv2i32_indexed;
4360  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4362  } else {
4363  Opc = AArch64::FMLAv2f32;
4364  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4366  }
4367  break;
4368 
4371  RC = &AArch64::FPR128RegClass;
4373  Opc = AArch64::FMLAv2i64_indexed;
4374  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4376  } else {
4377  Opc = AArch64::FMLAv2f64;
4378  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4380  }
4381  break;
4384  RC = &AArch64::FPR128RegClass;
4386  Opc = AArch64::FMLAv2i64_indexed;
4387  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4389  } else {
4390  Opc = AArch64::FMLAv2f64;
4391  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4393  }
4394  break;
4395 
4398  RC = &AArch64::FPR128RegClass;
4400  Opc = AArch64::FMLAv4i32_indexed;
4401  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4403  } else {
4404  Opc = AArch64::FMLAv4f32;
4405  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4407  }
4408  break;
4409 
4412  RC = &AArch64::FPR128RegClass;
4414  Opc = AArch64::FMLAv4i32_indexed;
4415  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4417  } else {
4418  Opc = AArch64::FMLAv4f32;
4419  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4421  }
4422  break;
4423 
4426  // FMUL I=A,B,0
4427  // FSUB R,I,C
4428  // ==> FNMSUB R,A,B,C // = -C + A*B
4429  // --- Create(FNMSUB);
4430  if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
4431  Opc = AArch64::FNMSUBSrrr;
4432  RC = &AArch64::FPR32RegClass;
4433  } else {
4434  Opc = AArch64::FNMSUBDrrr;
4435  RC = &AArch64::FPR64RegClass;
4436  }
4437  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4438  break;
4439  }
4440 
4443  // FNMUL I=A,B,0
4444  // FSUB R,I,C
4445  // ==> FNMADD R,A,B,C // = -A*B - C
4446  // --- Create(FNMADD);
4447  if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) {
4448  Opc = AArch64::FNMADDSrrr;
4449  RC = &AArch64::FPR32RegClass;
4450  } else {
4451  Opc = AArch64::FNMADDDrrr;
4452  RC = &AArch64::FPR64RegClass;
4453  }
4454  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4455  break;
4456  }
4457 
4460  // FMUL I=A,B,0
4461  // FSUB R,C,I
4462  // ==> FMSUB R,A,B,C (computes C - A*B)
4463  // --- Create(FMSUB);
4464  if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
4465  Opc = AArch64::FMSUBSrrr;
4466  RC = &AArch64::FPR32RegClass;
4467  } else {
4468  Opc = AArch64::FMSUBDrrr;
4469  RC = &AArch64::FPR64RegClass;
4470  }
4471  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4472  break;
4473  }
4474 
4476  Opc = AArch64::FMLSv1i32_indexed;
4477  RC = &AArch64::FPR32RegClass;
4478  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4480  break;
4481 
4483  Opc = AArch64::FMLSv1i64_indexed;
4484  RC = &AArch64::FPR64RegClass;
4485  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4487  break;
4488 
4491  RC = &AArch64::FPR64RegClass;
4493  Opc = AArch64::FMLSv2i32_indexed;
4494  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4496  } else {
4497  Opc = AArch64::FMLSv2f32;
4498  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4500  }
4501  break;
4502 
4505  RC = &AArch64::FPR128RegClass;
4507  Opc = AArch64::FMLSv2i64_indexed;
4508  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4510  } else {
4511  Opc = AArch64::FMLSv2f64;
4512  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4514  }
4515  break;
4516 
4519  RC = &AArch64::FPR128RegClass;
4521  Opc = AArch64::FMLSv4i32_indexed;
4522  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4524  } else {
4525  Opc = AArch64::FMLSv4f32;
4526  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4528  }
4529  break;
4532  RC = &AArch64::FPR64RegClass;
4533  unsigned NewVR = MRI.createVirtualRegister(RC);
4534  MachineInstrBuilder MIB1 =
4535  BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
4536  .add(Root.getOperand(2));
4537  InsInstrs.push_back(MIB1);
4538  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4540  Opc = AArch64::FMLAv2i32_indexed;
4541  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4542  FMAInstKind::Indexed, &NewVR);
4543  } else {
4544  Opc = AArch64::FMLAv2f32;
4545  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4546  FMAInstKind::Accumulator, &NewVR);
4547  }
4548  break;
4549  }
4552  RC = &AArch64::FPR128RegClass;
4553  unsigned NewVR = MRI.createVirtualRegister(RC);
4554  MachineInstrBuilder MIB1 =
4555  BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
4556  .add(Root.getOperand(2));
4557  InsInstrs.push_back(MIB1);
4558  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4560  Opc = AArch64::FMLAv4i32_indexed;
4561  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4562  FMAInstKind::Indexed, &NewVR);
4563  } else {
4564  Opc = AArch64::FMLAv4f32;
4565  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4566  FMAInstKind::Accumulator, &NewVR);
4567  }
4568  break;
4569  }
4572  RC = &AArch64::FPR128RegClass;
4573  unsigned NewVR = MRI.createVirtualRegister(RC);
4574  MachineInstrBuilder MIB1 =
4575  BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
4576  .add(Root.getOperand(2));
4577  InsInstrs.push_back(MIB1);
4578  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4580  Opc = AArch64::FMLAv2i64_indexed;
4581  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4582  FMAInstKind::Indexed, &NewVR);
4583  } else {
4584  Opc = AArch64::FMLAv2f64;
4585  MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4586  FMAInstKind::Accumulator, &NewVR);
4587  }
4588  break;
4589  }
4590  } // end switch (Pattern)
4591  // Record MUL and ADD/SUB for deletion
4592  DelInstrs.push_back(MUL);
4593  DelInstrs.push_back(&Root);
4594 }
4595 
4596 /// Replace csincr-branch sequence by simple conditional branch
4597 ///
4598 /// Examples:
4599 /// 1. \code
4600 /// csinc w9, wzr, wzr, <condition code>
4601 /// tbnz w9, #0, 0x44
4602 /// \endcode
4603 /// to
4604 /// \code
4605 /// b.<inverted condition code>
4606 /// \endcode
4607 ///
4608 /// 2. \code
4609 /// csinc w9, wzr, wzr, <condition code>
4610 /// tbz w9, #0, 0x44
4611 /// \endcode
4612 /// to
4613 /// \code
4614 /// b.<condition code>
4615 /// \endcode
4616 ///
4617 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
4618 /// compare's constant operand is power of 2.
4619 ///
4620 /// Examples:
4621 /// \code
4622 /// and w8, w8, #0x400
4623 /// cbnz w8, L1
4624 /// \endcode
4625 /// to
4626 /// \code
4627 /// tbnz w8, #10, L1
4628 /// \endcode
4629 ///
4630 /// \param MI Conditional Branch
4631 /// \return True when the simple conditional branch is generated
4632 ///
4634  bool IsNegativeBranch = false;
4635  bool IsTestAndBranch = false;
4636  unsigned TargetBBInMI = 0;
4637  switch (MI.getOpcode()) {
4638  default:
4639  llvm_unreachable("Unknown branch instruction?");
4640  case AArch64::Bcc:
4641  return false;
4642  case AArch64::CBZW:
4643  case AArch64::CBZX:
4644  TargetBBInMI = 1;
4645  break;
4646  case AArch64::CBNZW:
4647  case AArch64::CBNZX:
4648  TargetBBInMI = 1;
4649  IsNegativeBranch = true;
4650  break;
4651  case AArch64::TBZW:
4652  case AArch64::TBZX:
4653  TargetBBInMI = 2;
4654  IsTestAndBranch = true;
4655  break;
4656  case AArch64::TBNZW:
4657  case AArch64::TBNZX:
4658  TargetBBInMI = 2;
4659  IsNegativeBranch = true;
4660  IsTestAndBranch = true;
4661  break;
4662  }
4663  // So we increment a zero register and test for bits other
4664  // than bit 0? Conservatively bail out in case the verifier
4665  // missed this case.
4666  if (IsTestAndBranch && MI.getOperand(1).getImm())
4667  return false;
4668 
4669  // Find Definition.
4670  assert(MI.getParent() && "Incomplete machine instruciton\n");
4671  MachineBasicBlock *MBB = MI.getParent();
4672  MachineFunction *MF = MBB->getParent();
4673  MachineRegisterInfo *MRI = &MF->getRegInfo();
4674  unsigned VReg = MI.getOperand(0).getReg();
4676  return false;
4677 
4678  MachineInstr *DefMI = MRI->getVRegDef(VReg);
4679 
4680  // Look through COPY instructions to find definition.
4681  while (DefMI->isCopy()) {
4682  unsigned CopyVReg = DefMI->getOperand(1).getReg();
4683  if (!MRI->hasOneNonDBGUse(CopyVReg))
4684  return false;
4685  if (!MRI->hasOneDef(CopyVReg))
4686  return false;
4687  DefMI = MRI->getVRegDef(CopyVReg);
4688  }
4689 
4690  switch (DefMI->getOpcode()) {
4691  default:
4692  return false;
4693  // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
4694  case AArch64::ANDWri:
4695  case AArch64::ANDXri: {
4696  if (IsTestAndBranch)
4697  return false;
4698  if (DefMI->getParent() != MBB)
4699  return false;
4700  if (!MRI->hasOneNonDBGUse(VReg))
4701  return false;
4702 
4703  bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
4705  DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
4706  if (!isPowerOf2_64(Mask))
4707  return false;
4708 
4709  MachineOperand &MO = DefMI->getOperand(1);
4710  unsigned NewReg = MO.getReg();
4712  return false;
4713 
4714  assert(!MRI->def_empty(NewReg) && "Register must be defined.");
4715 
4716  MachineBasicBlock &RefToMBB = *MBB;
4717  MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
4718  DebugLoc DL = MI.getDebugLoc();
4719  unsigned Imm = Log2_64(Mask);
4720  unsigned Opc = (Imm < 32)
4721  ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
4722  : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
4723  MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
4724  .addReg(NewReg)
4725  .addImm(Imm)
4726  .addMBB(TBB);
4727  // Register lives on to the CBZ now.
4728  MO.setIsKill(false);
4729 
4730  // For immediate smaller than 32, we need to use the 32-bit
4731  // variant (W) in all cases. Indeed the 64-bit variant does not
4732  // allow to encode them.
4733  // Therefore, if the input register is 64-bit, we need to take the
4734  // 32-bit sub-part.
4735  if (!Is32Bit && Imm < 32)
4736  NewMI->getOperand(0).setSubReg(AArch64::sub_32);
4737  MI.eraseFromParent();
4738  return true;
4739  }
4740  // Look for CSINC
4741  case AArch64::CSINCWr:
4742  case AArch64::CSINCXr: {
4743  if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
4744  DefMI->getOperand(2).getReg() == AArch64::WZR) &&
4745  !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
4746  DefMI->getOperand(2).getReg() == AArch64::XZR))
4747  return false;
4748 
4749  if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
4750  return false;
4751 
4753  // Convert only when the condition code is not modified between
4754  // the CSINC and the branch. The CC may be used by other
4755  // instructions in between.
4757  return false;
4758  MachineBasicBlock &RefToMBB = *MBB;
4759  MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
4760  DebugLoc DL = MI.getDebugLoc();
4761  if (IsNegativeBranch)
4763  BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
4764  MI.eraseFromParent();
4765  return true;
4766  }
4767  }
4768 }
4769 
4770 std::pair<unsigned, unsigned>
4772  const unsigned Mask = AArch64II::MO_FRAGMENT;
4773  return std::make_pair(TF & Mask, TF & ~Mask);
4774 }
4775 
4778  using namespace AArch64II;
4779 
4780  static const std::pair<unsigned, const char *> TargetFlags[] = {
4781  {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
4782  {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
4783  {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
4784  {MO_HI12, "aarch64-hi12"}};
4785  return makeArrayRef(TargetFlags);
4786 }
4787 
4790  using namespace AArch64II;
4791 
4792  static const std::pair<unsigned, const char *> TargetFlags[] = {
4793  {MO_COFFSTUB, "aarch64-coffstub"},
4794  {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"},
4795  {MO_S, "aarch64-s"}, {MO_TLS, "aarch64-tls"},
4796  {MO_DLLIMPORT, "aarch64-dllimport"}};
4797  return makeArrayRef(TargetFlags);
4798 }
4799 
4802  static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
4803  {{MOSuppressPair, "aarch64-suppress-pair"},
4804  {MOStridedAccess, "aarch64-strided-access"}};
4805  return makeArrayRef(TargetFlags);
4806 }
4807 
4808 /// Constants defining how certain sequences should be outlined.
4809 /// This encompasses how an outlined function should be called, and what kind of
4810 /// frame should be emitted for that outlined function.
4811 ///
4812 /// \p MachineOutlinerDefault implies that the function should be called with
4813 /// a save and restore of LR to the stack.
4814 ///
4815 /// That is,
4816 ///
4817 /// I1 Save LR OUTLINED_FUNCTION:
4818 /// I2 --> BL OUTLINED_FUNCTION I1
4819 /// I3 Restore LR I2
4820 /// I3
4821 /// RET
4822 ///
4823 /// * Call construction overhead: 3 (save + BL + restore)
4824 /// * Frame construction overhead: 1 (ret)
4825 /// * Requires stack fixups? Yes
4826 ///
4827 /// \p MachineOutlinerTailCall implies that the function is being created from
4828 /// a sequence of instructions ending in a return.
4829 ///
4830 /// That is,
4831 ///
4832 /// I1 OUTLINED_FUNCTION:
4833 /// I2 --> B OUTLINED_FUNCTION I1
4834 /// RET I2
4835 /// RET
4836 ///
4837 /// * Call construction overhead: 1 (B)
4838 /// * Frame construction overhead: 0 (Return included in sequence)
4839 /// * Requires stack fixups? No
4840 ///
4841 /// \p MachineOutlinerNoLRSave implies that the function should be called using
4842 /// a BL instruction, but doesn't require LR to be saved and restored. This
4843 /// happens when LR is known to be dead.
4844 ///
4845 /// That is,
4846 ///
4847 /// I1 OUTLINED_FUNCTION:
4848 /// I2 --> BL OUTLINED_FUNCTION I1
4849 /// I3 I2
4850 /// I3
4851 /// RET
4852 ///
4853 /// * Call construction overhead: 1 (BL)
4854 /// * Frame construction overhead: 1 (RET)
4855 /// * Requires stack fixups? No
4856 ///
4857 /// \p MachineOutlinerThunk implies that the function is being created from
4858 /// a sequence of instructions ending in a call. The outlined function is
4859 /// called with a BL instruction, and the outlined function tail-calls the
4860 /// original call destination.
4861 ///
4862 /// That is,
4863 ///
4864 /// I1 OUTLINED_FUNCTION:
4865 /// I2 --> BL OUTLINED_FUNCTION I1
4866 /// BL f I2
4867 /// B f
4868 /// * Call construction overhead: 1 (BL)
4869 /// * Frame construction overhead: 0
4870 /// * Requires stack fixups? No
4871 ///
4872 /// \p MachineOutlinerRegSave implies that the function should be called with a
4873 /// save and restore of LR to an available register. This allows us to avoid
4874 /// stack fixups. Note that this outlining variant is compatible with the
4875 /// NoLRSave case.
4876 ///
4877 /// That is,
4878 ///
4879 /// I1 Save LR OUTLINED_FUNCTION:
4880 /// I2 --> BL OUTLINED_FUNCTION I1
4881 /// I3 Restore LR I2
4882 /// I3
4883 /// RET
4884 ///
4885 /// * Call construction overhead: 3 (save + BL + restore)
4886 /// * Frame construction overhead: 1 (ret)
4887 /// * Requires stack fixups? No
4889  MachineOutlinerDefault, /// Emit a save, restore, call, and return.
4890  MachineOutlinerTailCall, /// Only emit a branch.
4891  MachineOutlinerNoLRSave, /// Emit a call and return.
4892  MachineOutlinerThunk, /// Emit a call and tail-call.
4893  MachineOutlinerRegSave /// Same as default, but save to a register.
4894 };
4895 
4898  HasCalls = 0x4,
4900 };
4901 
4902 unsigned
4903 AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
4904  assert(C.LRUWasSet && "LRU wasn't set?");
4905  MachineFunction *MF = C.getMF();
4906  const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
4907  MF->getSubtarget().getRegisterInfo());
4908 
4909  // Check if there is an available register across the sequence that we can
4910  // use.
4911  for (unsigned Reg : AArch64::GPR64RegClass) {
4912  if (!ARI->isReservedReg(*MF, Reg) &&
4913  Reg != AArch64::LR && // LR is not reserved, but don't use it.
4914  Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
4915  Reg != AArch64::X17 && // Ditto for X17.
4917  return Reg;
4918  }
4919 
4920  // No suitable register. Return 0.
4921  return 0u;
4922 }
4923 
4926  std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
4927  outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
4928  unsigned SequenceSize =
4929  std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0,
4930  [this](unsigned Sum, const MachineInstr &MI) {
4931  return Sum + getInstSizeInBytes(MI);
4932  });
4933 
4934  // Properties about candidate MBBs that hold for all of them.
4935  unsigned FlagsSetInAll = 0xF;
4936 
4937  // Compute liveness information for each candidate, and set FlagsSetInAll.
4939  std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
4940  [&FlagsSetInAll](outliner::Candidate &C) {
4941  FlagsSetInAll &= C.Flags;
4942  });
4943 
4944  // According to the AArch64 Procedure Call Standard, the following are
4945  // undefined on entry/exit from a function call:
4946  //
4947  // * Registers x16, x17, (and thus w16, w17)
4948  // * Condition codes (and thus the NZCV register)
4949  //
4950  // Because if this, we can't outline any sequence of instructions where
4951  // one
4952  // of these registers is live into/across it. Thus, we need to delete
4953  // those
4954  // candidates.
4955  auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
4956  // If the unsafe registers in this block are all dead, then we don't need
4957  // to compute liveness here.
4958  if (C.Flags & UnsafeRegsDead)
4959  return false;
4960  C.initLRU(TRI);
4961  LiveRegUnits LRU = C.LRU;
4962  return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) ||
4963  !LRU.available(AArch64::NZCV));
4964  };
4965 
4966  // Are there any candidates where those registers are live?
4967  if (!(FlagsSetInAll & UnsafeRegsDead)) {
4968  // Erase every candidate that violates the restrictions above. (It could be
4969  // true that we have viable candidates, so it's not worth bailing out in
4970  // the case that, say, 1 out of 20 candidates violate the restructions.)
4971  RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
4972  RepeatedSequenceLocs.end(),
4973  CantGuaranteeValueAcrossCall),
4974  RepeatedSequenceLocs.end());
4975 
4976  // If the sequence doesn't have enough candidates left, then we're done.
4977  if (RepeatedSequenceLocs.size() < 2)
4978  return outliner::OutlinedFunction();
4979  }
4980 
4981  // At this point, we have only "safe" candidates to outline. Figure out
4982  // frame + call instruction information.
4983 
4984  unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
4985 
4986  // Helper lambda which sets call information for every candidate.
4987  auto SetCandidateCallInfo =
4988  [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
4989  for (outliner::Candidate &C : RepeatedSequenceLocs)
4990  C.setCallInfo(CallID, NumBytesForCall);
4991  };
4992 
4993  unsigned FrameID = MachineOutlinerDefault;
4994  unsigned NumBytesToCreateFrame = 4;
4995 
4996  bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
4997  return C.getMF()->getFunction().hasFnAttribute("branch-target-enforcement");
4998  });
4999 
5000  // Returns true if an instructions is safe to fix up, false otherwise.
5001  auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
5002  if (MI.isCall())
5003  return true;
5004 
5005  if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
5006  !MI.readsRegister(AArch64::SP, &TRI))
5007  return true;
5008 
5009  // Any modification of SP will break our code to save/restore LR.
5010  // FIXME: We could handle some instructions which add a constant
5011  // offset to SP, with a bit more work.
5012  if (MI.modifiesRegister(AArch64::SP, &TRI))
5013  return false;
5014 
5015  // At this point, we have a stack instruction that we might need to
5016  // fix up. We'll handle it if it's a load or store.
5017  if (MI.mayLoadOrStore()) {
5018  MachineOperand *Base; // Filled with the base operand of MI.
5019  int64_t Offset; // Filled with the offset of MI.
5020 
5021  // Does it allow us to offset the base operand and is the base the
5022  // register SP?
5023  if (!getMemOperandWithOffset(MI, Base, Offset, &TRI) || !Base->isReg() ||
5024  Base->getReg() != AArch64::SP)
5025  return false;
5026 
5027  // Find the minimum/maximum offset for this instruction and check
5028  // if fixing it up would be in range.
5029  int64_t MinOffset,
5030  MaxOffset; // Unscaled offsets for the instruction.
5031  unsigned Scale; // The scale to multiply the offsets by.
5032  unsigned DummyWidth;
5033  getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
5034 
5035  Offset += 16; // Update the offset to what it would be if we outlined.
5036  if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale)
5037  return false;
5038 
5039  // It's in range, so we can outline it.
5040  return true;
5041  }
5042 
5043  // FIXME: Add handling for instructions like "add x0, sp, #8".
5044 
5045  // We can't fix it up, so don't outline it.
5046  return false;
5047  };
5048 
5049  // True if it's possible to fix up each stack instruction in this sequence.
5050  // Important for frames/call variants that modify the stack.
5051  bool AllStackInstrsSafe = std::all_of(
5052  FirstCand.front(), std::next(FirstCand.back()), IsSafeToFixup);
5053 
5054  // If the last instruction in any candidate is a terminator, then we should
5055  // tail call all of the candidates.
5056  if (RepeatedSequenceLocs[0].back()->isTerminator()) {
5057  FrameID = MachineOutlinerTailCall;
5058  NumBytesToCreateFrame = 0;
5059  SetCandidateCallInfo(MachineOutlinerTailCall, 4);
5060  }
5061 
5062  else if (LastInstrOpcode == AArch64::BL ||
5063  (LastInstrOpcode == AArch64::BLR && !HasBTI)) {
5064  // FIXME: Do we need to check if the code after this uses the value of LR?
5065  FrameID = MachineOutlinerThunk;
5066  NumBytesToCreateFrame = 0;
5067  SetCandidateCallInfo(MachineOutlinerThunk, 4);
5068  }
5069 
5070  else {
5071  // We need to decide how to emit calls + frames. We can always emit the same
5072  // frame if we don't need to save to the stack. If we have to save to the
5073  // stack, then we need a different frame.
5074  unsigned NumBytesNoStackCalls = 0;
5075  std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
5076 
5077  for (outliner::Candidate &C : RepeatedSequenceLocs) {
5078  C.initLRU(TRI);
5079 
5080  // Is LR available? If so, we don't need a save.
5081  if (C.LRU.available(AArch64::LR)) {
5082  NumBytesNoStackCalls += 4;
5084  CandidatesWithoutStackFixups.push_back(C);
5085  }
5086 
5087  // Is an unused register available? If so, we won't modify the stack, so
5088  // we can outline with the same frame type as those that don't save LR.
5089  else if (findRegisterToSaveLRTo(C)) {
5090  NumBytesNoStackCalls += 12;
5092  CandidatesWithoutStackFixups.push_back(C);
5093  }
5094 
5095  // Is SP used in the sequence at all? If not, we don't have to modify
5096  // the stack, so we are guaranteed to get the same frame.
5097  else if (C.UsedInSequence.available(AArch64::SP)) {
5098  NumBytesNoStackCalls += 12;
5100  CandidatesWithoutStackFixups.push_back(C);
5101  }
5102 
5103  // If we outline this, we need to modify the stack. Pretend we don't
5104  // outline this by saving all of its bytes.
5105  else {
5106  NumBytesNoStackCalls += SequenceSize;
5107  }
5108  }
5109 
5110  // If there are no places where we have to save LR, then note that we
5111  // don't have to update the stack. Otherwise, give every candidate the
5112  // default call type, as long as it's safe to do so.
5113  if (!AllStackInstrsSafe ||
5114  NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
5115  RepeatedSequenceLocs = CandidatesWithoutStackFixups;
5116  FrameID = MachineOutlinerNoLRSave;
5117  } else {
5118  SetCandidateCallInfo(MachineOutlinerDefault, 12);
5119  }
5120 
5121  // If we dropped all of the candidates, bail out here.
5122  if (RepeatedSequenceLocs.size() < 2) {
5123  RepeatedSequenceLocs.clear();
5124  return outliner::OutlinedFunction();
5125  }
5126  }
5127 
5128  // Does every candidate's MBB contain a call? If so, then we might have a call
5129  // in the range.
5130  if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
5131  // Check if the range contains a call. These require a save + restore of the
5132  // link register.
5133  bool ModStackToSaveLR = false;
5134  if (std::any_of(FirstCand.front(), FirstCand.back(),
5135  [](const MachineInstr &MI) { return MI.isCall(); }))
5136  ModStackToSaveLR = true;
5137 
5138  // Handle the last instruction separately. If this is a tail call, then the
5139  // last instruction is a call. We don't want to save + restore in this case.
5140  // However, it could be possible that the last instruction is a call without
5141  // it being valid to tail call this sequence. We should consider this as
5142  // well.
5143  else if (FrameID != MachineOutlinerThunk &&
5144  FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall())
5145  ModStackToSaveLR = true;
5146 
5147  if (ModStackToSaveLR) {
5148  // We can't fix up the stack. Bail out.
5149  if (!AllStackInstrsSafe) {
5150  RepeatedSequenceLocs.clear();
5151  return outliner::OutlinedFunction();
5152  }
5153 
5154  // Save + restore LR.
5155  NumBytesToCreateFrame += 8;
5156  }
5157  }
5158 
5159  return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
5160  NumBytesToCreateFrame, FrameID);
5161 }
5162 
5164  MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
5165  const Function &F = MF.getFunction();
5166 
5167  // Can F be deduplicated by the linker? If it can, don't outline from it.
5168  if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
5169  return false;
5170 
5171  // Don't outline from functions with section markings; the program could
5172  // expect that all the code is in the named section.
5173  // FIXME: Allow outlining from multiple functions with the same section
5174  // marking.
5175  if (F.hasSection())
5176  return false;
5177 
5178  // Outlining from functions with redzones is unsafe since the outliner may
5179  // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
5180  // outline from it.
5182  if (!AFI || AFI->hasRedZone().getValueOr(true))
5183  return false;
5184 
5185  // It's safe to outline from MF.
5186  return true;
5187 }
5188 
5190  unsigned &Flags) const {
5191  // Check if LR is available through all of the MBB. If it's not, then set
5192  // a flag.
5194  "Suitable Machine Function for outlining must track liveness");
5196 
5197  std::for_each(MBB.rbegin(), MBB.rend(),
5198  [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
5199 
5200  // Check if each of the unsafe registers are available...
5201  bool W16AvailableInBlock = LRU.available(AArch64::W16);
5202  bool W17AvailableInBlock = LRU.available(AArch64::W17);
5203  bool NZCVAvailableInBlock = LRU.available(AArch64::NZCV);
5204 
5205  // If all of these are dead (and not live out), we know we don't have to check
5206  // them later.
5207  if (W16AvailableInBlock && W17AvailableInBlock && NZCVAvailableInBlock)
5209 
5210  // Now, add the live outs to the set.
5211  LRU.addLiveOuts(MBB);
5212 
5213  // If any of these registers is available in the MBB, but also a live out of
5214  // the block, then we know outlining is unsafe.
5215  if (W16AvailableInBlock && !LRU.available(AArch64::W16))
5216  return false;
5217  if (W17AvailableInBlock && !LRU.available(AArch64::W17))
5218  return false;
5219  if (NZCVAvailableInBlock && !LRU.available(AArch64::NZCV))
5220  return false;
5221 
5222  // Check if there's a call inside this MachineBasicBlock. If there is, then
5223  // set a flag.
5224  if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
5226 
5227  MachineFunction *MF = MBB.getParent();
5228 
5229  // In the event that we outline, we may have to save LR. If there is an
5230  // available register in the MBB, then we'll always save LR there. Check if
5231  // this is true.
5232  bool CanSaveLR = false;
5233  const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
5234  MF->getSubtarget().getRegisterInfo());
5235 
5236  // Check if there is an available register across the sequence that we can
5237  // use.
5238  for (unsigned Reg : AArch64::GPR64RegClass) {
5239  if (!ARI->isReservedReg(*MF, Reg) && Reg != AArch64::LR &&
5240  Reg != AArch64::X16 && Reg != AArch64::X17 && LRU.available(Reg)) {
5241  CanSaveLR = true;
5242  break;
5243  }
5244  }
5245 
5246  // Check if we have a register we can save LR to, and if LR was used
5247  // somewhere. If both of those things are true, then we need to evaluate the
5248  // safety of outlining stack instructions later.
5249  if (!CanSaveLR && !LRU.available(AArch64::LR))
5251 
5252  return true;
5253 }
5254 
5257  unsigned Flags) const {
5258  MachineInstr &MI = *MIT;
5259  MachineBasicBlock *MBB = MI.getParent();
5260  MachineFunction *MF = MBB->getParent();
5261  AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
5262 
5263  // Don't outline LOHs.
5264  if (FuncInfo->getLOHRelated().count(&MI))
5266 
5267  // Don't allow debug values to impact outlining type.
5268  if (MI.isDebugInstr() || MI.isIndirectDebugValue())
5270 
5271  // At this point, KILL instructions don't really tell us much so we can go
5272  // ahead and skip over them.
5273  if (MI.isKill())
5275 
5276  // Is this a terminator for a basic block?
5277  if (MI.isTerminator()) {
5278 
5279  // Is this the end of a function?
5280  if (MI.getParent()->succ_empty())
5282 
5283  // It's not, so don't outline it.
5285  }
5286 
5287  // Make sure none of the operands are un-outlinable.
5288  for (const MachineOperand &MOP : MI.operands()) {
5289  if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
5290  MOP.isTargetIndex())
5292 
5293  // If it uses LR or W30 explicitly, then don't touch it.
5294  if (MOP.isReg() && !MOP.isImplicit() &&
5295  (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
5297  }
5298 
5299  // Special cases for instructions that can always be outlined, but will fail
5300  // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
5301  // be outlined because they don't require a *specific* value to be in LR.
5302  if (MI.getOpcode() == AArch64::ADRP)
5304 
5305  // If MI is a call we might be able to outline it. We don't want to outline
5306  // any calls that rely on the position of items on the stack. When we outline
5307  // something containing a call, we have to emit a save and restore of LR in
5308  // the outlined function. Currently, this always happens by saving LR to the
5309  // stack. Thus, if we outline, say, half the parameters for a function call
5310  // plus the call, then we'll break the callee's expectations for the layout
5311  // of the stack.
5312  //
5313  // FIXME: Allow calls to functions which construct a stack frame, as long
5314  // as they don't access arguments on the stack.
5315  // FIXME: Figure out some way to analyze functions defined in other modules.
5316  // We should be able to compute the memory usage based on the IR calling
5317  // convention, even if we can't see the definition.
5318  if (MI.isCall()) {
5319  // Get the function associated with the call. Look at each operand and find
5320  // the one that represents the callee and get its name.
5321  const Function *Callee = nullptr;
5322  for (const MachineOperand &MOP : MI.operands()) {
5323  if (MOP.isGlobal()) {
5324  Callee = dyn_cast<Function>(MOP.getGlobal());
5325  break;
5326  }
5327  }
5328 
5329  // Never outline calls to mcount. There isn't any rule that would require
5330  // this, but the Linux kernel's "ftrace" feature depends on it.
5331  if (Callee && Callee->getName() == "\01_mcount")
5333 
5334  // If we don't know anything about the callee, assume it depends on the
5335  // stack layout of the caller. In that case, it's only legal to outline
5336  // as a tail-call. Whitelist the call instructions we know about so we
5337  // don't get unexpected results with call pseudo-instructions.
5338  auto UnknownCallOutlineType = outliner::InstrType::Illegal;
5339  if (MI.getOpcode() == AArch64::BLR || MI.getOpcode() == AArch64::BL)
5340  UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
5341 
5342  if (!Callee)
5343  return UnknownCallOutlineType;
5344 
5345  // We have a function we have information about. Check it if it's something
5346  // can safely outline.
5347  MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
5348 
5349  // We don't know what's going on with the callee at all. Don't touch it.
5350  if (!CalleeMF)
5351  return UnknownCallOutlineType;
5352 
5353  // Check if we know anything about the callee saves on the function. If we
5354  // don't, then don't touch it, since that implies that we haven't
5355  // computed anything about its stack frame yet.
5356  MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
5357  if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
5358  MFI.getNumObjects() > 0)
5359  return UnknownCallOutlineType;
5360 
5361  // At this point, we can say that CalleeMF ought to not pass anything on the
5362  // stack. Therefore, we can outline it.
5364  }
5365 
5366  // Don't outline positions.
5367  if (MI.isPosition())
5369 
5370  // Don't touch the link register or W30.
5371  if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
5372  MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
5374 
5376 }
5377 
5378 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
5379  for (MachineInstr &MI : MBB) {
5381  unsigned Width;
5382  int64_t Offset;
5383 
5384  // Is this a load or store with an immediate offset with SP as the base?
5385  if (!MI.mayLoadOrStore() ||
5386  !getMemOperandWithOffsetWidth(MI, Base, Offset, Width, &RI) ||
5387  (Base->isReg() && Base->getReg() != AArch64::SP))
5388  continue;
5389 
5390  // It is, so we have to fix it up.
5391  unsigned Scale;
5392  int64_t Dummy1, Dummy2;
5393 
5394  MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
5395  assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
5396  getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
5397  assert(Scale != 0 && "Unexpected opcode!");
5398 
5399  // We've pushed the return address to the stack, so add 16 to the offset.
5400  // This is safe, since we already checked if it would overflow when we
5401  // checked if this instruction was legal to outline.
5402  int64_t NewImm = (Offset + 16) / Scale;
5403  StackOffsetOperand.setImm(NewImm);
5404  }
5405 }
5406 
5409  const outliner::OutlinedFunction &OF) const {
5410  // For thunk outlining, rewrite the last instruction from a call to a
5411  // tail-call.
5413  MachineInstr *Call = &*--MBB.instr_end();
5414  unsigned TailOpcode;
5415  if (Call->getOpcode() == AArch64::BL) {
5416  TailOpcode = AArch64::TCRETURNdi;
5417  } else {
5418  assert(Call->getOpcode() == AArch64::BLR);
5419  TailOpcode = AArch64::TCRETURNriALL;
5420  }
5421  MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
5422  .add(Call->getOperand(0))
5423  .addImm(0);
5424  MBB.insert(MBB.end(), TC);
5425  Call->eraseFromParent();
5426  }
5427 
5428  // Is there a call in the outlined range?
5429  auto IsNonTailCall = [](MachineInstr &MI) {
5430  return MI.isCall() && !MI.isReturn();
5431  };
5432  if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) {
5433  // Fix up the instructions in the range, since we're going to modify the
5434  // stack.
5436  "Can only fix up stack references once");
5437  fixupPostOutline(MBB);
5438 
5439  // LR has to be a live in so that we can save it.
5440  MBB.addLiveIn(AArch64::LR);
5441 
5443  MachineBasicBlock::iterator Et = MBB.end();
5444 
5447  Et = std::prev(MBB.end());
5448 
5449  // Insert a save before the outlined region
5450  MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
5451  .addReg(AArch64::SP, RegState::Define)
5452  .addReg(AArch64::LR)
5453  .addReg(AArch64::SP)
5454  .addImm(-16);
5455  It = MBB.insert(It, STRXpre);
5456 
5457  const TargetSubtargetInfo &STI = MF.getSubtarget();
5458  const MCRegisterInfo *MRI = STI.getRegisterInfo();
5459  unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
5460 
5461  // Add a CFI saying the stack was moved 16 B down.
5462  int64_t StackPosEntry =
5464  BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
5465  .addCFIIndex(StackPosEntry)
5467 
5468  // Add a CFI saying that the LR that we want to find is now 16 B higher than
5469  // before.
5470  int64_t LRPosEntry =
5471  MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, 16));
5472  BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
5473  .addCFIIndex(LRPosEntry)
5475 
5476  // Insert a restore before the terminator for the function.
5477  MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
5478  .addReg(AArch64::SP, RegState::Define)
5479  .addReg(AArch64::LR, RegState::Define)
5480  .addReg(AArch64::SP)
5481  .addImm(16);
5482  Et = MBB.insert(Et, LDRXpost);
5483  }
5484 
5485  // If this is a tail call outlined function, then there's already a return.
5488  return;
5489 
5490  // It's not a tail call, so we have to insert the return ourselves.
5491  MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
5492  .addReg(AArch64::LR, RegState::Undef);
5493  MBB.insert(MBB.end(), ret);
5494 
5495  // Did we have to modify the stack by saving the link register?
5497  return;
5498 
5499  // We modified the stack.
5500  // Walk over the basic block and fix up all the stack accesses.
5501  fixupPostOutline(MBB);
5502 }
5503 
5506  MachineFunction &MF, const outliner::Candidate &C) const {
5507 
5508  // Are we tail calling?
5510  // If yes, then we can just branch to the label.
5511  It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
5512  .addGlobalAddress(M.getNamedValue(MF.getName()))
5513  .addImm(0));
5514  return It;
5515  }
5516 
5517  // Are we saving the link register?
5520  // No, so just insert the call.
5521  It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
5522  .addGlobalAddress(M.getNamedValue(MF.getName())));
5523  return It;
5524  }
5525 
5526  // We want to return the spot where we inserted the call.
5528 
5529  // Instructions for saving and restoring LR around the call instruction we're
5530  // going to insert.
5531  MachineInstr *Save;
5532  MachineInstr *Restore;
5533  // Can we save to a register?
5535  // FIXME: This logic should be sunk into a target-specific interface so that
5536  // we don't have to recompute the register.
5537  unsigned Reg = findRegisterToSaveLRTo(C);
5538  assert(Reg != 0 && "No callee-saved register available?");
5539 
5540  // Save and restore LR from that register.
5541  Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
5542  .addReg(AArch64::XZR)
5543  .addReg(AArch64::LR)
5544  .addImm(0);
5545  Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
5546  .addReg(AArch64::XZR)
5547  .addReg(Reg)
5548  .addImm(0);
5549  } else {
5550  // We have the default case. Save and restore from SP.
5551  Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
5552  .addReg(AArch64::SP, RegState::Define)
5553  .addReg(AArch64::LR)
5554  .addReg(AArch64::SP)
5555  .addImm(-16);
5556  Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
5557  .addReg(AArch64::SP, RegState::Define)
5558  .addReg(AArch64::LR, RegState::Define)
5559  .addReg(AArch64::SP)
5560  .addImm(16);
5561  }
5562 
5563  It = MBB.insert(It, Save);
5564  It++;
5565 
5566  // Insert the call.
5567  It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
5568  .addGlobalAddress(M.getNamedValue(MF.getName())));
5569  CallPt = It;
5570  It++;
5571 
5572  It = MBB.insert(It, Restore);
5573  return CallPt;
5574 }
5575 
5577  MachineFunction &MF) const {
5578  return MF.getFunction().optForMinSize();
5579 }
5580 
5581 #define GET_INSTRINFO_HELPERS
5582 #include "AArch64GenInstrInfo.inc"
bool getMemOperandWithOffsetWidth(MachineInstr &MI, MachineOperand *&BaseOp, int64_t &Offset, unsigned &Width, const TargetRegisterInfo *TRI) const
uint64_t CallInst * C
static bool isStridedAccess(const MachineInstr &MI)
Return true if the given load or store is a strided memory access.
ArrayRef< std::pair< unsigned, const char * > > getSerializableBitmaskMachineOperandTargetFlags() const override
const MachineInstrBuilder & add(const MachineOperand &MO) const
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address...
static bool isFPRCopy(const MachineInstr &MI)
Does this instruction rename an FPR without modifying bits?
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
instr_iterator instr_begin()
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
static unsigned getArithShiftValue(unsigned Imm)
getArithShiftValue - get the arithmetic shift value.
bool contains(unsigned Reg) const
Return true if the specified register is included in this register class.
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:633
instr_iterator instr_end()
MachineBasicBlock * getMBB() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
This class represents lattice values for constants.
Definition: AllocatorList.h:24
unsigned getNumObjects() const
Return the number of objects.
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
static bool isGPRCopy(const MachineInstr &MI)
Does this instruction rename a GPR without modifying bits?
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
static bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< MachineCombinerPattern > &Patterns)
Floating-Point Support.
static unsigned sForm(MachineInstr &Instr)
Get opcode of S version of Instr.
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
void initLRU(const TargetRegisterInfo &TRI)
Compute the registers that are live across this Candidate.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:383
static CondCode getInvertedCondCode(CondCode Code)
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:164
unsigned getReg() const
getReg - Returns the register number.
void setIsUndef(bool Val=true)
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset)
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:488
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
constexpr T getValueOr(U &&value) const LLVM_LVALUE_FUNCTION
Definition: Optional.h:172
static bool isSUBSRegImm(unsigned Opcode)
static bool processLogicalImmediate(uint64_t Imm, unsigned RegSize, uint64_t &Encoding)
processLogicalImmediate - Determine if an immediate value can be encoded as the immediate operand of ...
unsigned Reg
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr) const override
unsigned getSubReg() const
LiveRegUnits LRU
Contains physical register liveness information for the MBB containing this Candidate.
Offset can apply, at least partly.
const SetOfInstructions & getLOHRelated() const
Emit a save, restore, call, and return.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
Definition: MachineInstr.h:830
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:321
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1186
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
F(f)
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address...
MachineModuleInfo & getMMI() const
bool hasOneDef(unsigned RegNo) const
Return true if there is exactly one operand defining the specified register.
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:459
static MCCFIInstruction createDefCfaOffset(MCSymbol *L, int Offset)
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:475
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI)
Return the opcode that does not set flags when possible - otherwise return the original opcode...
bool hasCustomCheapAsMoveHandling() const
unsigned getSpillSize(const TargetRegisterClass &RC) const
Return the size in bytes of the stack slot allocated to hold a spilled copy of a register from class ...
An individual sequence of instructions to be replaced with a call to an outlined function.
static unsigned getOffsetStride(unsigned Opc)
static bool isLdStPairSuppressed(const MachineInstr &MI)
Return true if pairing the given load or store is hinted to be unprofitable.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
static bool isUnscaledLdSt(unsigned Opc)
Return true if this is an unscaled load/store.
return AArch64::GPR64RegClass contains(Reg)
iterator_range< succ_iterator > successors()
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address...
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
A description of a memory reference used in the backend.
bool isAssociativeAndCommutative(const MachineInstr &Inst) const override
Return true when Inst is associative and commutative so that it can be reassociated.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
static bool isSEHInstruction(const MachineInstr &MI)
Return true if the instructions is a SEH instruciton used for unwinding on Windows.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:211
static void suppressLdStPair(MachineInstr &MI)
Hint that pairing the given load or store is unprofitable.
const HexagonInstrInfo * TII
static bool isPairableLdStInst(const MachineInstr &MI)
Return true if pairing the given load or store may be paired with another.
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr)
Find a condition code used by the instruction.
static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize)
Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
const TargetRegisterClass * getRegClassConstraint(unsigned OpIdx, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
Compute the static register class constraint for operand OpIdx.
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:412
bool getMemOpInfo(unsigned Opcode, unsigned &Scale, unsigned &Width, int64_t &MinOffset, int64_t &MaxOffset) const
Returns true if opcode Opc is a memory operation.
MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, const outliner::Candidate &C) const override
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned SubReg
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool useMachineCombiner() const override
AArch64 supports MachineCombiner.
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:649
static int getRegClass(RegisterKind Is, unsigned RegWidth)
unsigned FrameConstructionID
Target-defined identifier for constructing a frame for this function.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
static bool isFalkorShiftExtFast(const MachineInstr &MI)
Returns true if the instruction has a shift by immediate that can be executed in one cycle less...
int getDwarfRegNum(unsigned RegNum, bool isEH) const
Map a target register to an equivalent dwarf register number.
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc, unsigned ZeroReg)
const char * getSymbolName() const
LLVM_NODISCARD unsigned addFrameInst(const MCCFIInstruction &Inst)
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
zlib-gnu style compression
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:667
static bool isCombineInstrCandidate64(unsigned Opc)
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
int isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, int64_t Offset1, unsigned Opcode1, int FI2, int64_t Offset2, unsigned Opcode2)
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:406
AArch64InstrInfo(const AArch64Subtarget &STI)
bool isFullCopy() const
void clearKillFlags(unsigned Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:118
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc)
GlobalValue * getNamedValue(StringRef Name) const
Return the global value in the module with the specified name, of arbitrary type. ...
Definition: Module.cpp:114
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< MachineCombinerPattern > &Patterns) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
iterator getLastNonDebugInstr()
Returns an iterator to the last non-debug instruction in the basic block, or end().
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, unsigned, unsigned, int &, int &, int &) const override
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc, unsigned ZeroReg=0, bool CheckZeroReg=false)
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
virtual const TargetInstrInfo * getInstrInfo() const
static const MachineMemOperand::Flags MOSuppressPair
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, unsigned &Flags) const override
unsigned getUndefRegState(bool B)
reverse_iterator rend()
amdgpu Simplify well known AMD library false Value * Callee
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc)
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
reverse_iterator rbegin()
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:161
unsigned getKillRegState(bool B)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode, unsigned ZeroReg, llvm::ArrayRef< unsigned > Indices) const
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:118
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
TargetInstrInfo - Interface to description of machine instruction set.
static bool isCondBranchOpcode(int Opc)
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:56
unsigned getDefRegState(bool B)
MachineBasicBlock::iterator & back()
static bool isCombineInstrCandidateFP(const MachineInstr &Inst)
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
bool operator|=(SparseBitVector< ElementSize > &LHS, const SparseBitVector< ElementSize > *RHS)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool getMemOperandWithOffset(MachineInstr &MI, MachineOperand *&BaseOp, int64_t &Offset, const TargetRegisterInfo *TRI) const override
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
bool hasLinkOnceODRLinkage() const
Definition: GlobalValue.h:427
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
MachineInstrBundleIterator< MachineInstr > iterator
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
FPOpFusion::FPOpFusionMode AllowFPOpFusion
AllowFPOpFusion - This flag is set by the -fuse-fp-ops=xxx option.
const AArch64RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
bool LRUWasSet
True if initLRU has been called on this Candidate.
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address...
unsigned const MachineRegisterInfo * MRI
bool shouldClusterMemOps(MachineOperand &BaseOp1, MachineOperand &BaseOp2, unsigned NumLoads) const override
Detect opportunities for ldp/stp formation.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:516
InstrType
Represents how an instruction should be mapped by the outliner.
LiveRegUnits UsedInSequence
Contains the accumulated register liveness information for the instructions in this Candidate...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void genAlternativeCodeSequence(MachineInstr &Root, MachineCombinerPattern Pattern, SmallVectorImpl< MachineInstr *> &InsInstrs, SmallVectorImpl< MachineInstr *> &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg) const override
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
bool hasExynosCheapAsMoveHandling() const
unsigned CallConstructionID
Identifier denoting the instructions to emit to call an outlined function from this point...
static cl::opt< unsigned > BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of Bcc instructions (DEBUG)"))
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static AArch64_AM::ShiftExtendType getArithExtendType(unsigned Imm)
The information necessary to create an outlined function for some class of candidate.
void setCallInfo(unsigned CID, unsigned CO)
Set the CallConstructionID and CallOverhead of this candidate to CID and CO respectively.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, int &Offset, const AArch64InstrInfo *TII)
rewriteAArch64FrameIndex - Rewrite MI to access &#39;Offset&#39; bytes from the FP.
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
unsigned getSubReg(unsigned Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo...
static cl::opt< unsigned > CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"))
static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr, const TargetRegisterInfo *TRI)
Check if CmpInstr can be substituted by MI.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:434
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1193
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
bool definesRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr fully defines the specified register.
bool def_empty(unsigned RegNo) const
def_empty - Return true if there are no instructions defining the specified register (it may be live-...
MI-level patchpoint operands.
Definition: StackMaps.h:77
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
self_iterator getIterator()
Definition: ilist_node.h:82
bool areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA=nullptr) const override
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
bool isAsCheapAsAMove(const MachineInstr &MI) const override
bool expandPostRAPseudo(MachineInstr &MI) const override
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1214
auto remove_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1226
const MachineInstrBuilder & addFrameIndex(int Idx) const
bool isThroughputPattern(MachineCombinerPattern Pattern) const override
Return true when a code sequence can improve throughput.
MachineCombinerPattern
These are instruction patterns matched by the machine combiner pass.
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< MachineCombinerPattern > &Patterns) const override
Return true when there is potentially a faster code sequence for an instruction chain ending in Root...
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
MO_HI12 - This flag indicates that a symbol operand represents the bits 13-24 of a 64-bit address...
bool isCopy() const
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:398
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
MachineBasicBlock::iterator & front()
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DstReg, ArrayRef< MachineOperand > Cond, unsigned TrueReg, unsigned FalseReg) const override
Emit a call and tail-call.
bool isDebugInstr() const
Definition: MachineInstr.h:999
This class contains a discriminated union of information about pointers in memory operands...
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, unsigned &SubIdx) const override
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
EH_LABEL - Represents a label in mid basic block used to track locations needed for debug and excepti...
Definition: ISDOpcodes.h:672
void setIsKill(bool Val=true)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
The memory access writes data.
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewVReg=nullptr)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition: StackMaps.h:51
static bool isIndirectBranchOpcode(int Opc)
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
void setOpcode(unsigned Op)
Definition: MCInst.h:173
static bool isUncondBranchOpcode(int Opc)
void getNoop(MCInst &NopInst) const override
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
BlockVerifier::State From
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:534
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr...
MachineOperand class - Representation of each machine instruction operand.
bool hasSection() const
Check if this global has a custom object file section.
Definition: GlobalObject.h:82
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl< MachineOperand > &Cond)
MachineInstrBuilder MachineInstrBuilder & DefMI
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
static bool unscaleOffset(unsigned Opc, int64_t &Offset)
static bool isCombineInstrCandidate(unsigned Opc)
outliner::InstrType getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const override
int64_t getImm() const
bool optimizeCondBranch(MachineInstr &MI) const override
Replace csincr-branch sequence by simple conditional branch.
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
void addRegisterDefined(unsigned Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
Target - Wrapper for Target specific information.
bool isCandidateToMergeOrPair(MachineInstr &MI) const
Return true if this is a load/store that can be potentially paired/merged.
static bool getMaddPatterns(MachineInstr &Root, SmallVectorImpl< MachineCombinerPattern > &Patterns)
Find instructions that can be turned into madd.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
int findRegisterDefOperandIdx(unsigned Reg, bool isDead=false, bool Overlap=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a def of the specified register or -1 if it is not found...
bool readsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
static unsigned convertToFlagSettingOpc(unsigned Opc, bool &Is64Bit)
Return the opcode that set flags when possible.
static cl::opt< unsigned > TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"))
Optional< bool > hasRedZone() const
MO_S - Indicates that the bits of the symbol operand represented by MO_G0 etc are signed...
CATCHRET - Represents a return from a catch block funclet.
Definition: ISDOpcodes.h:686
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:254
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
TargetSubtargetInfo - Generic base class for all target subtargets.
static MachineInstr * genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr *> &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, const TargetRegisterClass *RC)
genMaddR - Generate madd instruction and combine mul and add using an extra virtual register Example ...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
bool isReservedReg(const MachineFunction &MF, unsigned Reg) const
Representation of each machine instruction.
Definition: MachineInstr.h:64
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
static bool isGPRZero(const MachineInstr &MI)
Does this instruction set its full destination register to zero?
bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, int CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction supplying the argument to the comparison into one that...
uint16_t getEncodingValue(unsigned RegNo) const
Returns the encoding for RegNo.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, unsigned &SrcReg2, int &CmpMask, int &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2...
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
MI-level stackmap operands.
Definition: StackMaps.h:36
virtual void genAlternativeCodeSequence(MachineInstr &Root, MachineCombinerPattern Pattern, SmallVectorImpl< MachineInstr *> &InsInstrs, SmallVectorImpl< MachineInstr *> &DelInstrs, DenseMap< unsigned, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "...
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef< unsigned > Indices) const
unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg...
static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB)
Check if AArch64::NZCV should be alive in successors of MBB.
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override
TargetOptions Options
Definition: TargetMachine.h:97
static MachineOperand CreateImm(int64_t Val)
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Emit a call and return.
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:595
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page...
void setSubReg(unsigned subReg)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition: StackMaps.h:105
static unsigned getBranchDisplacementBits(unsigned Opc)
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
bool hasOneNonDBGUse(unsigned RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug instruction using the specified regis...
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override
static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, unsigned SrcReg, bool IsKill, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
bool isKill() const
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:31
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:749
const TargetRegisterClass * getMinimalPhysRegClass(unsigned Reg, MVT VT=MVT::Other) const
Returns the Register Class of a physical register of the given type, picking the most sub register cl...
bool isReg() const
isReg - Tests if this is a MO_Register operand.
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg)
static bool isCombineInstrCandidate32(unsigned Opc)
MachineOutlinerMBBFlags
bool hasZeroCycleZeroingFP() const
static bool scaleOffset(unsigned Opc, int64_t &Offset)
bool memoperands_empty() const
Return true if we don&#39;t have any memory operands which described the memory access done by this instr...
Definition: MachineInstr.h:546
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasZeroCycleZeroingGP() const
static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, unsigned DestReg, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool isPosition() const
Definition: MachineInstr.h:995
static bool areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI, const AccessKind AccessToCheck=AK_All)
True when condition flags are accessed (either by writing or reading) on the instruction trace starti...
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
Only emit a branch.
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition: MCInstrDesc.h:204
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
IRTranslator LLVM IR MI
void addOperand(const MCOperand &Op)
Definition: MCInst.h:186
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow...
void RemoveOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
MachineOperand & getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const
Return the immediate offset of the base register in a load/store LdSt.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
UnaryPredicate for_each(R &&Range, UnaryPredicate P)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1179
static bool isCombineInstrSettingFlag(unsigned Opc)
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
outliner::OutlinedFunction getOutliningCandidateInfo(std::vector< outliner::Candidate > &RepeatedSequenceLocs) const override
bool isIndirectDebugValue() const
A DBG_VALUE is indirect iff the first operand is a register and the second operand is an immediate...
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects...
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:123
MachineFunction * getMF() const
static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr *> &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, FMAInstKind kind=FMAInstKind::Default, const unsigned *ReplacedAddend=nullptr)
genFusedMultiply - Generate fused multiply instructions.
int findRegisterUseOperandIdx(unsigned Reg, bool isKill=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a use of the specific register or -1 if it is not found...
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:48
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:545
static const MachineMemOperand::Flags MOStridedAccess
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, unsigned NumRegs)
bool isAsCheapAsAMove(QueryType Type=AllInBundle) const
Returns true if this instruction has the same cost (or less) than a move instruction.
Definition: MachineInstr.h:906
static bool UpdateOperandRegClass(MachineInstr &Instr)
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const override
static bool isADDSRegImm(unsigned Opcode)