LLVM  8.0.1
ARMFrameLowering.cpp
Go to the documentation of this file.
1 //===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the ARM implementation of TargetFrameLowering class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARMFrameLowering.h"
15 #include "ARMBaseInstrInfo.h"
16 #include "ARMBaseRegisterInfo.h"
17 #include "ARMConstantPoolValue.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMSubtarget.h"
22 #include "Utils/ARMBaseInfo.h"
23 #include "llvm/ADT/BitVector.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallVector.h"
41 #include "llvm/IR/Attributes.h"
42 #include "llvm/IR/CallingConv.h"
43 #include "llvm/IR/DebugLoc.h"
44 #include "llvm/IR/Function.h"
45 #include "llvm/MC/MCContext.h"
46 #include "llvm/MC/MCDwarf.h"
47 #include "llvm/MC/MCInstrDesc.h"
48 #include "llvm/MC/MCRegisterInfo.h"
49 #include "llvm/Support/CodeGen.h"
51 #include "llvm/Support/Compiler.h"
52 #include "llvm/Support/Debug.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <cstddef>
61 #include <cstdint>
62 #include <iterator>
63 #include <utility>
64 #include <vector>
65 
66 #define DEBUG_TYPE "arm-frame-lowering"
67 
68 using namespace llvm;
69 
70 static cl::opt<bool>
71 SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
72  cl::desc("Align ARM NEON spills in prolog and epilog"));
73 
76  unsigned NumAlignedDPRCS2Regs);
77 
79  : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
80  STI(sti) {}
81 
83  // iOS always has a FP for backtracking, force other targets to keep their FP
84  // when doing FastISel. The emitted code is currently superior, and in cases
85  // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
86  return MF.getSubtarget<ARMSubtarget>().useFastISel();
87 }
88 
89 /// Returns true if the target can safely skip saving callee-saved registers
90 /// for noreturn nounwind functions.
95 
96  // Frame pointer and link register are not treated as normal CSR, thus we
97  // can always skip CSR saves for nonreturning functions.
98  return true;
99 }
100 
101 /// hasFP - Return true if the specified function should have a dedicated frame
102 /// pointer register. This is true if the function has variable sized allocas
103 /// or if frame pointer elimination is disabled.
105  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
106  const MachineFrameInfo &MFI = MF.getFrameInfo();
107 
108  // ABI-required frame pointer.
110  return true;
111 
112  // Frame pointer required for use within this function.
113  return (RegInfo->needsStackRealignment(MF) ||
114  MFI.hasVarSizedObjects() ||
115  MFI.isFrameAddressTaken());
116 }
117 
118 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
119 /// not required, we reserve argument space for call sites in the function
120 /// immediately on entry to the current function. This eliminates the need for
121 /// add/sub sp brackets around call sites. Returns true if the call frame is
122 /// included as part of the stack frame.
124  const MachineFrameInfo &MFI = MF.getFrameInfo();
125  unsigned CFSize = MFI.getMaxCallFrameSize();
126  // It's not always a good idea to include the call frame as part of the
127  // stack frame. ARM (especially Thumb) has small immediate offset to
128  // address the stack frame. So a large call frame can cause poor codegen
129  // and may even makes it impossible to scavenge a register.
130  if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
131  return false;
132 
133  return !MFI.hasVarSizedObjects();
134 }
135 
136 /// canSimplifyCallFramePseudos - If there is a reserved call frame, the
137 /// call frame pseudos can be simplified. Unlike most targets, having a FP
138 /// is not sufficient here since we still may reference some objects via SP
139 /// even when FP is available in Thumb2 mode.
140 bool
143 }
144 
146  const MCPhysReg *CSRegs) {
147  // Integer spill area is handled with "pop".
148  if (isPopOpcode(MI.getOpcode())) {
149  // The first two operands are predicates. The last two are
150  // imp-def and imp-use of SP. Check everything in between.
151  for (int i = 5, e = MI.getNumOperands(); i != e; ++i)
152  if (!isCalleeSavedRegister(MI.getOperand(i).getReg(), CSRegs))
153  return false;
154  return true;
155  }
156  if ((MI.getOpcode() == ARM::LDR_POST_IMM ||
157  MI.getOpcode() == ARM::LDR_POST_REG ||
158  MI.getOpcode() == ARM::t2LDR_POST) &&
159  isCalleeSavedRegister(MI.getOperand(0).getReg(), CSRegs) &&
160  MI.getOperand(1).getReg() == ARM::SP)
161  return true;
162 
163  return false;
164 }
165 
167  bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
168  const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
169  unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
170  ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
171  if (isARM)
172  emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
173  Pred, PredReg, TII, MIFlags);
174  else
175  emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
176  Pred, PredReg, TII, MIFlags);
177 }
178 
179 static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
180  MachineBasicBlock::iterator &MBBI, const DebugLoc &dl,
181  const ARMBaseInstrInfo &TII, int NumBytes,
182  unsigned MIFlags = MachineInstr::NoFlags,
184  unsigned PredReg = 0) {
185  emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
186  MIFlags, Pred, PredReg);
187 }
188 
189 static int sizeOfSPAdjustment(const MachineInstr &MI) {
190  int RegSize;
191  switch (MI.getOpcode()) {
192  case ARM::VSTMDDB_UPD:
193  RegSize = 8;
194  break;
195  case ARM::STMDB_UPD:
196  case ARM::t2STMDB_UPD:
197  RegSize = 4;
198  break;
199  case ARM::t2STR_PRE:
200  case ARM::STR_PRE_IMM:
201  return 4;
202  default:
203  llvm_unreachable("Unknown push or pop like instruction");
204  }
205 
206  int count = 0;
207  // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
208  // pred) so the list starts at 4.
209  for (int i = MI.getNumOperands() - 1; i >= 4; --i)
210  count += RegSize;
211  return count;
212 }
213 
215  size_t StackSizeInBytes) {
216  const MachineFrameInfo &MFI = MF.getFrameInfo();
217  const Function &F = MF.getFunction();
218  unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
219  if (F.hasFnAttribute("stack-probe-size"))
220  F.getFnAttribute("stack-probe-size")
222  .getAsInteger(0, StackProbeSize);
223  return (StackSizeInBytes >= StackProbeSize) &&
224  !F.hasFnAttribute("no-stack-arg-probe");
225 }
226 
227 namespace {
228 
229 struct StackAdjustingInsts {
230  struct InstInfo {
232  unsigned SPAdjust;
233  bool BeforeFPSet;
234  };
235 
237 
238  void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
239  bool BeforeFPSet = false) {
240  InstInfo Info = {I, SPAdjust, BeforeFPSet};
241  Insts.push_back(Info);
242  }
243 
244  void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
245  auto Info =
246  llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
247  assert(Info != Insts.end() && "invalid sp adjusting instruction");
248  Info->SPAdjust += ExtraBytes;
249  }
250 
251  void emitDefCFAOffsets(MachineBasicBlock &MBB, const DebugLoc &dl,
252  const ARMBaseInstrInfo &TII, bool HasFP) {
253  MachineFunction &MF = *MBB.getParent();
254  unsigned CFAOffset = 0;
255  for (auto &Info : Insts) {
256  if (HasFP && !Info.BeforeFPSet)
257  return;
258 
259  CFAOffset -= Info.SPAdjust;
260  unsigned CFIIndex = MF.addFrameInst(
261  MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
262  BuildMI(MBB, std::next(Info.I), dl,
263  TII.get(TargetOpcode::CFI_INSTRUCTION))
264  .addCFIIndex(CFIIndex)
266  }
267  }
268 };
269 
270 } // end anonymous namespace
271 
272 /// Emit an instruction sequence that will align the address in
273 /// register Reg by zero-ing out the lower bits. For versions of the
274 /// architecture that support Neon, this must be done in a single
275 /// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
276 /// single instruction. That function only gets called when optimizing
277 /// spilling of D registers on a core with the Neon instruction set
278 /// present.
280  const TargetInstrInfo &TII,
281  MachineBasicBlock &MBB,
283  const DebugLoc &DL, const unsigned Reg,
284  const unsigned Alignment,
285  const bool MustBeSingleInstruction) {
286  const ARMSubtarget &AST =
287  static_cast<const ARMSubtarget &>(MF.getSubtarget());
288  const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
289  const unsigned AlignMask = Alignment - 1;
290  const unsigned NrBitsToZero = countTrailingZeros(Alignment);
291  assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
292  if (!AFI->isThumbFunction()) {
293  // if the BFC instruction is available, use that to zero the lower
294  // bits:
295  // bfc Reg, #0, log2(Alignment)
296  // otherwise use BIC, if the mask to zero the required number of bits
297  // can be encoded in the bic immediate field
298  // bic Reg, Reg, Alignment-1
299  // otherwise, emit
300  // lsr Reg, Reg, log2(Alignment)
301  // lsl Reg, Reg, log2(Alignment)
302  if (CanUseBFC) {
303  BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
304  .addReg(Reg, RegState::Kill)
305  .addImm(~AlignMask)
306  .add(predOps(ARMCC::AL));
307  } else if (AlignMask <= 255) {
308  BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
309  .addReg(Reg, RegState::Kill)
310  .addImm(AlignMask)
312  .add(condCodeOp());
313  } else {
314  assert(!MustBeSingleInstruction &&
315  "Shouldn't call emitAligningInstructions demanding a single "
316  "instruction to be emitted for large stack alignment for a target "
317  "without BFC.");
318  BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
319  .addReg(Reg, RegState::Kill)
320  .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))
322  .add(condCodeOp());
323  BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
324  .addReg(Reg, RegState::Kill)
325  .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))
327  .add(condCodeOp());
328  }
329  } else {
330  // Since this is only reached for Thumb-2 targets, the BFC instruction
331  // should always be available.
332  assert(CanUseBFC);
333  BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
334  .addReg(Reg, RegState::Kill)
335  .addImm(~AlignMask)
336  .add(predOps(ARMCC::AL));
337  }
338 }
339 
340 /// We need the offset of the frame pointer relative to other MachineFrameInfo
341 /// offsets which are encoded relative to SP at function begin.
342 /// See also emitPrologue() for how the FP is set up.
343 /// Unfortunately we cannot determine this value in determineCalleeSaves() yet
344 /// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
345 /// this to produce a conservative estimate that we check in an assert() later.
346 static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI) {
347  // This is a conservative estimation: Assume the frame pointer being r7 and
348  // pc("r15") up to r8 getting spilled before (= 8 registers).
349  return -AFI.getArgRegsSaveSize() - (8 * 4);
350 }
351 
353  MachineBasicBlock &MBB) const {
354  MachineBasicBlock::iterator MBBI = MBB.begin();
355  MachineFrameInfo &MFI = MF.getFrameInfo();
357  MachineModuleInfo &MMI = MF.getMMI();
358  MCContext &Context = MMI.getContext();
359  const TargetMachine &TM = MF.getTarget();
360  const MCRegisterInfo *MRI = Context.getRegisterInfo();
361  const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
362  const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
363  assert(!AFI->isThumb1OnlyFunction() &&
364  "This emitPrologue does not support Thumb1!");
365  bool isARM = !AFI->isThumbFunction();
367  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
368  unsigned NumBytes = MFI.getStackSize();
369  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
370 
371  // Debug location must be unknown since the first debug location is used
372  // to determine the end of the prologue.
373  DebugLoc dl;
374 
375  unsigned FramePtr = RegInfo->getFrameRegister(MF);
376 
377  // Determine the sizes of each callee-save spill areas and record which frame
378  // belongs to which callee-save spill areas.
379  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
380  int FramePtrSpillFI = 0;
381  int D8SpillFI = 0;
382 
383  // All calls are tail calls in GHC calling conv, and functions have no
384  // prologue/epilogue.
386  return;
387 
388  StackAdjustingInsts DefCFAOffsetCandidates;
389  bool HasFP = hasFP(MF);
390 
391  // Allocate the vararg register save area.
392  if (ArgRegsSaveSize) {
393  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
395  DefCFAOffsetCandidates.addInst(std::prev(MBBI), ArgRegsSaveSize, true);
396  }
397 
398  if (!AFI->hasStackFrame() &&
399  (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {
400  if (NumBytes - ArgRegsSaveSize != 0) {
401  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize),
403  DefCFAOffsetCandidates.addInst(std::prev(MBBI),
404  NumBytes - ArgRegsSaveSize, true);
405  }
406  DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
407  return;
408  }
409 
410  // Determine spill area sizes.
411  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
412  unsigned Reg = CSI[i].getReg();
413  int FI = CSI[i].getFrameIdx();
414  switch (Reg) {
415  case ARM::R8:
416  case ARM::R9:
417  case ARM::R10:
418  case ARM::R11:
419  case ARM::R12:
420  if (STI.splitFramePushPop(MF)) {
421  GPRCS2Size += 4;
422  break;
423  }
425  case ARM::R0:
426  case ARM::R1:
427  case ARM::R2:
428  case ARM::R3:
429  case ARM::R4:
430  case ARM::R5:
431  case ARM::R6:
432  case ARM::R7:
433  case ARM::LR:
434  if (Reg == FramePtr)
435  FramePtrSpillFI = FI;
436  GPRCS1Size += 4;
437  break;
438  default:
439  // This is a DPR. Exclude the aligned DPRCS2 spills.
440  if (Reg == ARM::D8)
441  D8SpillFI = FI;
442  if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
443  DPRCSSize += 8;
444  }
445  }
446 
447  // Move past area 1.
448  MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
449  if (GPRCS1Size > 0) {
450  GPRCS1Push = LastPush = MBBI++;
451  DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
452  }
453 
454  // Determine starting offsets of spill areas.
455  unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size;
456  unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
457  unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U;
458  unsigned DPRGapSize = (GPRCS1Size + GPRCS2Size + ArgRegsSaveSize) % DPRAlign;
459  unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
460  int FramePtrOffsetInPush = 0;
461  if (HasFP) {
462  int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
463  assert(getMaxFPOffset(MF.getFunction(), *AFI) <= FPOffset &&
464  "Max FP estimation is wrong");
465  FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize;
466  AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
467  NumBytes);
468  }
469  AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
470  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
471  AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
472 
473  // Move past area 2.
474  if (GPRCS2Size > 0) {
475  GPRCS2Push = LastPush = MBBI++;
476  DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
477  }
478 
479  // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
480  // .cfi_offset operations will reflect that.
481  if (DPRGapSize) {
482  assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
483  if (LastPush != MBB.end() &&
484  tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize))
485  DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
486  else {
487  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
489  DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);
490  }
491  }
492 
493  // Move past area 3.
494  if (DPRCSSize > 0) {
495  // Since vpush register list cannot have gaps, there may be multiple vpush
496  // instructions in the prologue.
497  while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
498  DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI));
499  LastPush = MBBI++;
500  }
501  }
502 
503  // Move past the aligned DPRCS2 area.
504  if (AFI->getNumAlignedDPRCS2Regs() > 0) {
506  // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
507  // leaves the stack pointer pointing to the DPRCS2 area.
508  //
509  // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
510  NumBytes += MFI.getObjectOffset(D8SpillFI);
511  } else
512  NumBytes = DPRCSOffset;
513 
514  if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
515  uint32_t NumWords = NumBytes >> 2;
516 
517  if (NumWords < 65536)
518  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
519  .addImm(NumWords)
521  .add(predOps(ARMCC::AL));
522  else
523  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4)
524  .addImm(NumWords)
526 
527  switch (TM.getCodeModel()) {
528  case CodeModel::Tiny:
529  llvm_unreachable("Tiny code model not available on ARM.");
530  case CodeModel::Small:
531  case CodeModel::Medium:
532  case CodeModel::Kernel:
533  BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
535  .addExternalSymbol("__chkstk")
536  .addReg(ARM::R4, RegState::Implicit)
537  .setMIFlags(MachineInstr::FrameSetup);
538  break;
539  case CodeModel::Large:
540  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
541  .addExternalSymbol("__chkstk")
543 
544  BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
546  .addReg(ARM::R12, RegState::Kill)
547  .addReg(ARM::R4, RegState::Implicit)
548  .setMIFlags(MachineInstr::FrameSetup);
549  break;
550  }
551 
552  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
553  .addReg(ARM::SP, RegState::Kill)
557  .add(condCodeOp());
558  NumBytes = 0;
559  }
560 
561  if (NumBytes) {
562  // Adjust SP after all the callee-save spills.
563  if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
564  tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, NumBytes))
565  DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
566  else {
567  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
569  DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);
570  }
571 
572  if (HasFP && isARM)
573  // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
574  // Note it's not safe to do this in Thumb2 mode because it would have
575  // taken two instructions:
576  // mov sp, r7
577  // sub sp, #24
578  // If an interrupt is taken between the two instructions, then sp is in
579  // an inconsistent state (pointing to the middle of callee-saved area).
580  // The interrupt handler can end up clobbering the registers.
581  AFI->setShouldRestoreSPFromFP(true);
582  }
583 
584  // Set FP to point to the stack slot that contains the previous FP.
585  // For iOS, FP is R7, which has now been stored in spill area 1.
586  // Otherwise, if this is not iOS, all the callee-saved registers go
587  // into spill area 1, including the FP in R11. In either case, it
588  // is in area one and the adjustment needs to take place just after
589  // that push.
590  if (HasFP) {
591  MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push);
592  unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
593  emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush,
594  dl, TII, FramePtr, ARM::SP,
595  PushSize + FramePtrOffsetInPush,
597  if (FramePtrOffsetInPush + PushSize != 0) {
598  unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
599  nullptr, MRI->getDwarfRegNum(FramePtr, true),
600  -(ArgRegsSaveSize - FramePtrOffsetInPush)));
601  BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
602  .addCFIIndex(CFIIndex)
604  } else {
605  unsigned CFIIndex =
607  nullptr, MRI->getDwarfRegNum(FramePtr, true)));
608  BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
609  .addCFIIndex(CFIIndex)
611  }
612  }
613 
614  // Now that the prologue's actual instructions are finalised, we can insert
615  // the necessary DWARF cf instructions to describe the situation. Start by
616  // recording where each register ended up:
617  if (GPRCS1Size > 0) {
618  MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
619  int CFIIndex;
620  for (const auto &Entry : CSI) {
621  unsigned Reg = Entry.getReg();
622  int FI = Entry.getFrameIdx();
623  switch (Reg) {
624  case ARM::R8:
625  case ARM::R9:
626  case ARM::R10:
627  case ARM::R11:
628  case ARM::R12:
629  if (STI.splitFramePushPop(MF))
630  break;
632  case ARM::R0:
633  case ARM::R1:
634  case ARM::R2:
635  case ARM::R3:
636  case ARM::R4:
637  case ARM::R5:
638  case ARM::R6:
639  case ARM::R7:
640  case ARM::LR:
642  nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
643  BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
644  .addCFIIndex(CFIIndex)
646  break;
647  }
648  }
649  }
650 
651  if (GPRCS2Size > 0) {
652  MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
653  for (const auto &Entry : CSI) {
654  unsigned Reg = Entry.getReg();
655  int FI = Entry.getFrameIdx();
656  switch (Reg) {
657  case ARM::R8:
658  case ARM::R9:
659  case ARM::R10:
660  case ARM::R11:
661  case ARM::R12:
662  if (STI.splitFramePushPop(MF)) {
663  unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
664  unsigned Offset = MFI.getObjectOffset(FI);
665  unsigned CFIIndex = MF.addFrameInst(
666  MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
667  BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
668  .addCFIIndex(CFIIndex)
670  }
671  break;
672  }
673  }
674  }
675 
676  if (DPRCSSize > 0) {
677  // Since vpush register list cannot have gaps, there may be multiple vpush
678  // instructions in the prologue.
679  MachineBasicBlock::iterator Pos = std::next(LastPush);
680  for (const auto &Entry : CSI) {
681  unsigned Reg = Entry.getReg();
682  int FI = Entry.getFrameIdx();
683  if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
684  (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
685  unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
686  unsigned Offset = MFI.getObjectOffset(FI);
687  unsigned CFIIndex = MF.addFrameInst(
688  MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
689  BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
690  .addCFIIndex(CFIIndex)
692  }
693  }
694  }
695 
696  // Now we can emit descriptions of where the canonical frame address was
697  // throughout the process. If we have a frame pointer, it takes over the job
698  // half-way through, so only the first few .cfi_def_cfa_offset instructions
699  // actually get emitted.
700  DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
701 
702  if (STI.isTargetELF() && hasFP(MF))
704  AFI->getFramePtrSpillOffset());
705 
706  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
707  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
708  AFI->setDPRCalleeSavedGapSize(DPRGapSize);
709  AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
710 
711  // If we need dynamic stack realignment, do it here. Be paranoid and make
712  // sure if we also have VLAs, we have a base pointer for frame access.
713  // If aligned NEON registers were spilled, the stack has already been
714  // realigned.
715  if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) {
716  unsigned MaxAlign = MFI.getMaxAlignment();
717  assert(!AFI->isThumb1OnlyFunction());
718  if (!AFI->isThumbFunction()) {
719  emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
720  false);
721  } else {
722  // We cannot use sp as source/dest register here, thus we're using r4 to
723  // perform the calculations. We're emitting the following sequence:
724  // mov r4, sp
725  // -- use emitAligningInstructions to produce best sequence to zero
726  // -- out lower bits in r4
727  // mov sp, r4
728  // FIXME: It will be better just to find spare register here.
729  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
730  .addReg(ARM::SP, RegState::Kill)
731  .add(predOps(ARMCC::AL));
732  emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
733  false);
734  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
735  .addReg(ARM::R4, RegState::Kill)
736  .add(predOps(ARMCC::AL));
737  }
738 
739  AFI->setShouldRestoreSPFromFP(true);
740  }
741 
742  // If we need a base pointer, set it up here. It's whatever the value
743  // of the stack pointer is at this point. Any variable size objects
744  // will be allocated after this, so we can still use the base pointer
745  // to reference locals.
746  // FIXME: Clarify FrameSetup flags here.
747  if (RegInfo->hasBasePointer(MF)) {
748  if (isARM)
749  BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister())
750  .addReg(ARM::SP)
752  .add(condCodeOp());
753  else
754  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister())
755  .addReg(ARM::SP)
756  .add(predOps(ARMCC::AL));
757  }
758 
759  // If the frame has variable sized objects then the epilogue must restore
760  // the sp from fp. We can assume there's an FP here since hasFP already
761  // checks for hasVarSizedObjects.
762  if (MFI.hasVarSizedObjects())
763  AFI->setShouldRestoreSPFromFP(true);
764 }
765 
767  MachineBasicBlock &MBB) const {
768  MachineFrameInfo &MFI = MF.getFrameInfo();
770  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
771  const ARMBaseInstrInfo &TII =
772  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
773  assert(!AFI->isThumb1OnlyFunction() &&
774  "This emitEpilogue does not support Thumb1!");
775  bool isARM = !AFI->isThumbFunction();
776 
777  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
778  int NumBytes = (int)MFI.getStackSize();
779  unsigned FramePtr = RegInfo->getFrameRegister(MF);
780 
781  // All calls are tail calls in GHC calling conv, and functions have no
782  // prologue/epilogue.
784  return;
785 
786  // First put ourselves on the first (from top) terminator instructions.
788  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
789 
790  if (!AFI->hasStackFrame()) {
791  if (NumBytes - ArgRegsSaveSize != 0)
792  emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ArgRegsSaveSize);
793  } else {
794  // Unwind MBBI to point to first LDR / VLDRD.
795  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
796  if (MBBI != MBB.begin()) {
797  do {
798  --MBBI;
799  } while (MBBI != MBB.begin() && isCSRestore(*MBBI, TII, CSRegs));
800  if (!isCSRestore(*MBBI, TII, CSRegs))
801  ++MBBI;
802  }
803 
804  // Move SP to start of FP callee save spill area.
805  NumBytes -= (ArgRegsSaveSize +
808  AFI->getDPRCalleeSavedGapSize() +
810 
811  // Reset SP based on frame pointer only if the stack frame extends beyond
812  // frame pointer stack slot or target is ELF and the function has FP.
813  if (AFI->shouldRestoreSPFromFP()) {
814  NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
815  if (NumBytes) {
816  if (isARM)
817  emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
818  ARMCC::AL, 0, TII);
819  else {
820  // It's not possible to restore SP from FP in a single instruction.
821  // For iOS, this looks like:
822  // mov sp, r7
823  // sub sp, #24
824  // This is bad, if an interrupt is taken after the mov, sp is in an
825  // inconsistent state.
826  // Use the first callee-saved register as a scratch register.
827  assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
828  "No scratch register to restore SP from FP!");
829  emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
830  ARMCC::AL, 0, TII);
831  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
832  .addReg(ARM::R4)
833  .add(predOps(ARMCC::AL));
834  }
835  } else {
836  // Thumb2 or ARM.
837  if (isARM)
838  BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
839  .addReg(FramePtr)
841  .add(condCodeOp());
842  else
843  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
844  .addReg(FramePtr)
845  .add(predOps(ARMCC::AL));
846  }
847  } else if (NumBytes &&
848  !tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
849  emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
850 
851  // Increment past our save areas.
852  if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
853  MBBI++;
854  // Since vpop register list cannot have gaps, there may be multiple vpop
855  // instructions in the epilogue.
856  while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
857  MBBI++;
858  }
859  if (AFI->getDPRCalleeSavedGapSize()) {
860  assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
861  "unexpected DPR alignment gap");
862  emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize());
863  }
864 
865  if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
866  if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
867  }
868 
869  if (ArgRegsSaveSize)
870  emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);
871 }
872 
873 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
874 /// debug info. It's the same as what we use for resolving the code-gen
875 /// references for now. FIXME: This can go wrong when references are
876 /// SP-relative and simple call frames aren't used.
877 int
879  unsigned &FrameReg) const {
880  return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
881 }
882 
883 int
885  int FI, unsigned &FrameReg,
886  int SPAdj) const {
887  const MachineFrameInfo &MFI = MF.getFrameInfo();
888  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
890  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
891  int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
892  int FPOffset = Offset - AFI->getFramePtrSpillOffset();
893  bool isFixed = MFI.isFixedObjectIndex(FI);
894 
895  FrameReg = ARM::SP;
896  Offset += SPAdj;
897 
898  // SP can move around if there are allocas. We may also lose track of SP
899  // when emergency spilling inside a non-reserved call frame setup.
900  bool hasMovingSP = !hasReservedCallFrame(MF);
901 
902  // When dynamically realigning the stack, use the frame pointer for
903  // parameters, and the stack/base pointer for locals.
904  if (RegInfo->needsStackRealignment(MF)) {
905  assert(hasFP(MF) && "dynamic stack realignment without a FP!");
906  if (isFixed) {
907  FrameReg = RegInfo->getFrameRegister(MF);
908  Offset = FPOffset;
909  } else if (hasMovingSP) {
910  assert(RegInfo->hasBasePointer(MF) &&
911  "VLAs and dynamic stack alignment, but missing base pointer!");
912  FrameReg = RegInfo->getBaseRegister();
913  Offset -= SPAdj;
914  }
915  return Offset;
916  }
917 
918  // If there is a frame pointer, use it when we can.
919  if (hasFP(MF) && AFI->hasStackFrame()) {
920  // Use frame pointer to reference fixed objects. Use it for locals if
921  // there are VLAs (and thus the SP isn't reliable as a base).
922  if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
923  FrameReg = RegInfo->getFrameRegister(MF);
924  return FPOffset;
925  } else if (hasMovingSP) {
926  assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
927  if (AFI->isThumb2Function()) {
928  // Try to use the frame pointer if we can, else use the base pointer
929  // since it's available. This is handy for the emergency spill slot, in
930  // particular.
931  if (FPOffset >= -255 && FPOffset < 0) {
932  FrameReg = RegInfo->getFrameRegister(MF);
933  return FPOffset;
934  }
935  }
936  } else if (AFI->isThumbFunction()) {
937  // Prefer SP to base pointer, if the offset is suitably aligned and in
938  // range as the effective range of the immediate offset is bigger when
939  // basing off SP.
940  // Use add <rd>, sp, #<imm8>
941  // ldr <rd>, [sp, #<imm8>]
942  if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
943  return Offset;
944  // In Thumb2 mode, the negative offset is very limited. Try to avoid
945  // out of range references. ldr <rt>,[<rn>, #-<imm8>]
946  if (AFI->isThumb2Function() && FPOffset >= -255 && FPOffset < 0) {
947  FrameReg = RegInfo->getFrameRegister(MF);
948  return FPOffset;
949  }
950  } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
951  // Otherwise, use SP or FP, whichever is closer to the stack slot.
952  FrameReg = RegInfo->getFrameRegister(MF);
953  return FPOffset;
954  }
955  }
956  // Use the base pointer if we have one.
957  if (RegInfo->hasBasePointer(MF))
958  FrameReg = RegInfo->getBaseRegister();
959  return Offset;
960 }
961 
962 void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
964  const std::vector<CalleeSavedInfo> &CSI,
965  unsigned StmOpc, unsigned StrOpc,
966  bool NoGap,
967  bool(*Func)(unsigned, bool),
968  unsigned NumAlignedDPRCS2Regs,
969  unsigned MIFlags) const {
970  MachineFunction &MF = *MBB.getParent();
971  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
973 
974  DebugLoc DL;
975 
976  using RegAndKill = std::pair<unsigned, bool>;
977 
979  unsigned i = CSI.size();
980  while (i != 0) {
981  unsigned LastReg = 0;
982  for (; i != 0; --i) {
983  unsigned Reg = CSI[i-1].getReg();
984  if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
985 
986  // D-registers in the aligned area DPRCS2 are NOT spilled here.
987  if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
988  continue;
989 
990  const MachineRegisterInfo &MRI = MF.getRegInfo();
991  bool isLiveIn = MRI.isLiveIn(Reg);
992  if (!isLiveIn && !MRI.isReserved(Reg))
993  MBB.addLiveIn(Reg);
994  // If NoGap is true, push consecutive registers and then leave the rest
995  // for other instructions. e.g.
996  // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
997  if (NoGap && LastReg && LastReg != Reg-1)
998  break;
999  LastReg = Reg;
1000  // Do not set a kill flag on values that are also marked as live-in. This
1001  // happens with the @llvm-returnaddress intrinsic and with arguments
1002  // passed in callee saved registers.
1003  // Omitting the kill flags is conservatively correct even if the live-in
1004  // is not used after all.
1005  Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn));
1006  }
1007 
1008  if (Regs.empty())
1009  continue;
1010 
1011  llvm::sort(Regs, [&](const RegAndKill &LHS, const RegAndKill &RHS) {
1012  return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
1013  });
1014 
1015  if (Regs.size() > 1 || StrOpc== 0) {
1016  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
1017  .addReg(ARM::SP)
1018  .setMIFlags(MIFlags)
1019  .add(predOps(ARMCC::AL));
1020  for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1021  MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
1022  } else if (Regs.size() == 1) {
1023  BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
1024  .addReg(Regs[0].first, getKillRegState(Regs[0].second))
1025  .addReg(ARM::SP)
1026  .setMIFlags(MIFlags)
1027  .addImm(-4)
1028  .add(predOps(ARMCC::AL));
1029  }
1030  Regs.clear();
1031 
1032  // Put any subsequent vpush instructions before this one: they will refer to
1033  // higher register numbers so need to be pushed first in order to preserve
1034  // monotonicity.
1035  if (MI != MBB.begin())
1036  --MI;
1037  }
1038 }
1039 
1040 void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1042  std::vector<CalleeSavedInfo> &CSI,
1043  unsigned LdmOpc, unsigned LdrOpc,
1044  bool isVarArg, bool NoGap,
1045  bool(*Func)(unsigned, bool),
1046  unsigned NumAlignedDPRCS2Regs) const {
1047  MachineFunction &MF = *MBB.getParent();
1048  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1051  DebugLoc DL;
1052  bool isTailCall = false;
1053  bool isInterrupt = false;
1054  bool isTrap = false;
1055  if (MBB.end() != MI) {
1056  DL = MI->getDebugLoc();
1057  unsigned RetOpcode = MI->getOpcode();
1058  isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri);
1059  isInterrupt =
1060  RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
1061  isTrap =
1062  RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||
1063  RetOpcode == ARM::tTRAP;
1064  }
1065 
1067  unsigned i = CSI.size();
1068  while (i != 0) {
1069  unsigned LastReg = 0;
1070  bool DeleteRet = false;
1071  for (; i != 0; --i) {
1072  CalleeSavedInfo &Info = CSI[i-1];
1073  unsigned Reg = Info.getReg();
1074  if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1075 
1076  // The aligned reloads from area DPRCS2 are not inserted here.
1077  if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1078  continue;
1079 
1080  if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1081  !isTrap && STI.hasV5TOps()) {
1082  if (MBB.succ_empty()) {
1083  Reg = ARM::PC;
1084  // Fold the return instruction into the LDM.
1085  DeleteRet = true;
1086  LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1087  // We 'restore' LR into PC so it is not live out of the return block:
1088  // Clear Restored bit.
1089  Info.setRestored(false);
1090  } else
1091  LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
1092  }
1093 
1094  // If NoGap is true, pop consecutive registers and then leave the rest
1095  // for other instructions. e.g.
1096  // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1097  if (NoGap && LastReg && LastReg != Reg-1)
1098  break;
1099 
1100  LastReg = Reg;
1101  Regs.push_back(Reg);
1102  }
1103 
1104  if (Regs.empty())
1105  continue;
1106 
1107  llvm::sort(Regs, [&](unsigned LHS, unsigned RHS) {
1108  return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
1109  });
1110 
1111  if (Regs.size() > 1 || LdrOpc == 0) {
1112  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
1113  .addReg(ARM::SP)
1114  .add(predOps(ARMCC::AL));
1115  for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1116  MIB.addReg(Regs[i], getDefRegState(true));
1117  if (DeleteRet) {
1118  if (MI != MBB.end()) {
1119  MIB.copyImplicitOps(*MI);
1120  MI->eraseFromParent();
1121  }
1122  }
1123  MI = MIB;
1124  } else if (Regs.size() == 1) {
1125  // If we adjusted the reg to PC from LR above, switch it back here. We
1126  // only do that for LDM.
1127  if (Regs[0] == ARM::PC)
1128  Regs[0] = ARM::LR;
1129  MachineInstrBuilder MIB =
1130  BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
1131  .addReg(ARM::SP, RegState::Define)
1132  .addReg(ARM::SP);
1133  // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1134  // that refactoring is complete (eventually).
1135  if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
1136  MIB.addReg(0);
1138  } else
1139  MIB.addImm(4);
1140  MIB.add(predOps(ARMCC::AL));
1141  }
1142  Regs.clear();
1143 
1144  // Put any subsequent vpop instructions after this one: they will refer to
1145  // higher register numbers so need to be popped afterwards.
1146  if (MI != MBB.end())
1147  ++MI;
1148  }
1149 }
1150 
1151 /// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1152 /// starting from d8. Also insert stack realignment code and leave the stack
1153 /// pointer pointing to the d8 spill slot.
1156  unsigned NumAlignedDPRCS2Regs,
1157  const std::vector<CalleeSavedInfo> &CSI,
1158  const TargetRegisterInfo *TRI) {
1159  MachineFunction &MF = *MBB.getParent();
1161  DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1162  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1163  MachineFrameInfo &MFI = MF.getFrameInfo();
1164 
1165  // Mark the D-register spill slots as properly aligned. Since MFI computes
1166  // stack slot layout backwards, this can actually mean that the d-reg stack
1167  // slot offsets can be wrong. The offset for d8 will always be correct.
1168  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1169  unsigned DNum = CSI[i].getReg() - ARM::D8;
1170  if (DNum > NumAlignedDPRCS2Regs - 1)
1171  continue;
1172  int FI = CSI[i].getFrameIdx();
1173  // The even-numbered registers will be 16-byte aligned, the odd-numbered
1174  // registers will be 8-byte aligned.
1175  MFI.setObjectAlignment(FI, DNum % 2 ? 8 : 16);
1176 
1177  // The stack slot for D8 needs to be maximally aligned because this is
1178  // actually the point where we align the stack pointer. MachineFrameInfo
1179  // computes all offsets relative to the incoming stack pointer which is a
1180  // bit weird when realigning the stack. Any extra padding for this
1181  // over-alignment is not realized because the code inserted below adjusts
1182  // the stack pointer by numregs * 8 before aligning the stack pointer.
1183  if (DNum == 0)
1184  MFI.setObjectAlignment(FI, MFI.getMaxAlignment());
1185  }
1186 
1187  // Move the stack pointer to the d8 spill slot, and align it at the same
1188  // time. Leave the stack slot address in the scratch register r4.
1189  //
1190  // sub r4, sp, #numregs * 8
1191  // bic r4, r4, #align - 1
1192  // mov sp, r4
1193  //
1194  bool isThumb = AFI->isThumbFunction();
1195  assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1196  AFI->setShouldRestoreSPFromFP(true);
1197 
1198  // sub r4, sp, #numregs * 8
1199  // The immediate is <= 64, so it doesn't need any special encoding.
1200  unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
1201  BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1202  .addReg(ARM::SP)
1203  .addImm(8 * NumAlignedDPRCS2Regs)
1204  .add(predOps(ARMCC::AL))
1205  .add(condCodeOp());
1206 
1207  unsigned MaxAlign = MF.getFrameInfo().getMaxAlignment();
1208  // We must set parameter MustBeSingleInstruction to true, since
1209  // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
1210  // stack alignment. Luckily, this can always be done since all ARM
1211  // architecture versions that support Neon also support the BFC
1212  // instruction.
1213  emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);
1214 
1215  // mov sp, r4
1216  // The stack pointer must be adjusted before spilling anything, otherwise
1217  // the stack slots could be clobbered by an interrupt handler.
1218  // Leave r4 live, it is used below.
1219  Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
1220  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
1221  .addReg(ARM::R4)
1222  .add(predOps(ARMCC::AL));
1223  if (!isThumb)
1224  MIB.add(condCodeOp());
1225 
1226  // Now spill NumAlignedDPRCS2Regs registers starting from d8.
1227  // r4 holds the stack slot address.
1228  unsigned NextReg = ARM::D8;
1229 
1230  // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
1231  // The writeback is only needed when emitting two vst1.64 instructions.
1232  if (NumAlignedDPRCS2Regs >= 6) {
1233  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1234  &ARM::QQPRRegClass);
1235  MBB.addLiveIn(SupReg);
1236  BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4)
1237  .addReg(ARM::R4, RegState::Kill)
1238  .addImm(16)
1239  .addReg(NextReg)
1240  .addReg(SupReg, RegState::ImplicitKill)
1241  .add(predOps(ARMCC::AL));
1242  NextReg += 4;
1243  NumAlignedDPRCS2Regs -= 4;
1244  }
1245 
1246  // We won't modify r4 beyond this point. It currently points to the next
1247  // register to be spilled.
1248  unsigned R4BaseReg = NextReg;
1249 
1250  // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
1251  if (NumAlignedDPRCS2Regs >= 4) {
1252  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1253  &ARM::QQPRRegClass);
1254  MBB.addLiveIn(SupReg);
1255  BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
1256  .addReg(ARM::R4)
1257  .addImm(16)
1258  .addReg(NextReg)
1259  .addReg(SupReg, RegState::ImplicitKill)
1260  .add(predOps(ARMCC::AL));
1261  NextReg += 4;
1262  NumAlignedDPRCS2Regs -= 4;
1263  }
1264 
1265  // 16-byte aligned vst1.64 with 2 d-regs.
1266  if (NumAlignedDPRCS2Regs >= 2) {
1267  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1268  &ARM::QPRRegClass);
1269  MBB.addLiveIn(SupReg);
1270  BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
1271  .addReg(ARM::R4)
1272  .addImm(16)
1273  .addReg(SupReg)
1274  .add(predOps(ARMCC::AL));
1275  NextReg += 2;
1276  NumAlignedDPRCS2Regs -= 2;
1277  }
1278 
1279  // Finally, use a vanilla vstr.64 for the odd last register.
1280  if (NumAlignedDPRCS2Regs) {
1281  MBB.addLiveIn(NextReg);
1282  // vstr.64 uses addrmode5 which has an offset scale of 4.
1283  BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
1284  .addReg(NextReg)
1285  .addReg(ARM::R4)
1286  .addImm((NextReg - R4BaseReg) * 2)
1287  .add(predOps(ARMCC::AL));
1288  }
1289 
1290  // The last spill instruction inserted should kill the scratch register r4.
1291  std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1292 }
1293 
1294 /// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
1295 /// iterator to the following instruction.
1298  unsigned NumAlignedDPRCS2Regs) {
1299  // sub r4, sp, #numregs * 8
1300  // bic r4, r4, #align - 1
1301  // mov sp, r4
1302  ++MI; ++MI; ++MI;
1303  assert(MI->mayStore() && "Expecting spill instruction");
1304 
1305  // These switches all fall through.
1306  switch(NumAlignedDPRCS2Regs) {
1307  case 7:
1308  ++MI;
1309  assert(MI->mayStore() && "Expecting spill instruction");
1311  default:
1312  ++MI;
1313  assert(MI->mayStore() && "Expecting spill instruction");
1315  case 1:
1316  case 2:
1317  case 4:
1318  assert(MI->killsRegister(ARM::R4) && "Missed kill flag");
1319  ++MI;
1320  }
1321  return MI;
1322 }
1323 
1324 /// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
1325 /// starting from d8. These instructions are assumed to execute while the
1326 /// stack is still aligned, unlike the code inserted by emitPopInst.
1329  unsigned NumAlignedDPRCS2Regs,
1330  const std::vector<CalleeSavedInfo> &CSI,
1331  const TargetRegisterInfo *TRI) {
1332  MachineFunction &MF = *MBB.getParent();
1334  DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1335  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1336 
1337  // Find the frame index assigned to d8.
1338  int D8SpillFI = 0;
1339  for (unsigned i = 0, e = CSI.size(); i != e; ++i)
1340  if (CSI[i].getReg() == ARM::D8) {
1341  D8SpillFI = CSI[i].getFrameIdx();
1342  break;
1343  }
1344 
1345  // Materialize the address of the d8 spill slot into the scratch register r4.
1346  // This can be fairly complicated if the stack frame is large, so just use
1347  // the normal frame index elimination mechanism to do it. This code runs as
1348  // the initial part of the epilog where the stack and base pointers haven't
1349  // been changed yet.
1350  bool isThumb = AFI->isThumbFunction();
1351  assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1352 
1353  unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
1354  BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1355  .addFrameIndex(D8SpillFI)
1356  .addImm(0)
1357  .add(predOps(ARMCC::AL))
1358  .add(condCodeOp());
1359 
1360  // Now restore NumAlignedDPRCS2Regs registers starting from d8.
1361  unsigned NextReg = ARM::D8;
1362 
1363  // 16-byte aligned vld1.64 with 4 d-regs and writeback.
1364  if (NumAlignedDPRCS2Regs >= 6) {
1365  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1366  &ARM::QQPRRegClass);
1367  BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
1368  .addReg(ARM::R4, RegState::Define)
1370  .addImm(16)
1372  .add(predOps(ARMCC::AL));
1373  NextReg += 4;
1374  NumAlignedDPRCS2Regs -= 4;
1375  }
1376 
1377  // We won't modify r4 beyond this point. It currently points to the next
1378  // register to be spilled.
1379  unsigned R4BaseReg = NextReg;
1380 
1381  // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
1382  if (NumAlignedDPRCS2Regs >= 4) {
1383  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1384  &ARM::QQPRRegClass);
1385  BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
1386  .addReg(ARM::R4)
1387  .addImm(16)
1389  .add(predOps(ARMCC::AL));
1390  NextReg += 4;
1391  NumAlignedDPRCS2Regs -= 4;
1392  }
1393 
1394  // 16-byte aligned vld1.64 with 2 d-regs.
1395  if (NumAlignedDPRCS2Regs >= 2) {
1396  unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1397  &ARM::QPRRegClass);
1398  BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
1399  .addReg(ARM::R4)
1400  .addImm(16)
1401  .add(predOps(ARMCC::AL));
1402  NextReg += 2;
1403  NumAlignedDPRCS2Regs -= 2;
1404  }
1405 
1406  // Finally, use a vanilla vldr.64 for the remaining odd register.
1407  if (NumAlignedDPRCS2Regs)
1408  BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
1409  .addReg(ARM::R4)
1410  .addImm(2 * (NextReg - R4BaseReg))
1411  .add(predOps(ARMCC::AL));
1412 
1413  // Last store kills r4.
1414  std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1415 }
1416 
1419  const std::vector<CalleeSavedInfo> &CSI,
1420  const TargetRegisterInfo *TRI) const {
1421  if (CSI.empty())
1422  return false;
1423 
1424  MachineFunction &MF = *MBB.getParent();
1426 
1427  unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
1428  unsigned PushOneOpc = AFI->isThumbFunction() ?
1429  ARM::t2STR_PRE : ARM::STR_PRE_IMM;
1430  unsigned FltOpc = ARM::VSTMDDB_UPD;
1431  unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1432  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0,
1434  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0,
1436  emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
1437  NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
1438 
1439  // The code above does not insert spill code for the aligned DPRCS2 registers.
1440  // The stack realignment code will be inserted between the push instructions
1441  // and these spills.
1442  if (NumAlignedDPRCS2Regs)
1443  emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
1444 
1445  return true;
1446 }
1447 
1450  std::vector<CalleeSavedInfo> &CSI,
1451  const TargetRegisterInfo *TRI) const {
1452  if (CSI.empty())
1453  return false;
1454 
1455  MachineFunction &MF = *MBB.getParent();
1457  bool isVarArg = AFI->getArgRegsSaveSize() > 0;
1458  unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1459 
1460  // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
1461  // registers. Do that here instead.
1462  if (NumAlignedDPRCS2Regs)
1463  emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
1464 
1465  unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
1466  unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;
1467  unsigned FltOpc = ARM::VLDMDIA_UPD;
1468  emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
1469  NumAlignedDPRCS2Regs);
1470  emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1471  &isARMArea2Register, 0);
1472  emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1473  &isARMArea1Register, 0);
1474 
1475  return true;
1476 }
1477 
1478 // FIXME: Make generic?
1479 static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
1480  const ARMBaseInstrInfo &TII) {
1481  unsigned FnSize = 0;
1482  for (auto &MBB : MF) {
1483  for (auto &MI : MBB)
1484  FnSize += TII.getInstSizeInBytes(MI);
1485  }
1486  return FnSize;
1487 }
1488 
1489 /// estimateRSStackSizeLimit - Look at each instruction that references stack
1490 /// frames and return the stack size limit beyond which some of these
1491 /// instructions will require a scratch register during their expansion later.
1492 // FIXME: Move to TII?
1494  const TargetFrameLowering *TFI) {
1495  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1496  unsigned Limit = (1 << 12) - 1;
1497  for (auto &MBB : MF) {
1498  for (auto &MI : MBB) {
1499  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1500  if (!MI.getOperand(i).isFI())
1501  continue;
1502 
1503  // When using ADDri to get the address of a stack object, 255 is the
1504  // largest offset guaranteed to fit in the immediate offset.
1505  if (MI.getOpcode() == ARM::ADDri) {
1506  Limit = std::min(Limit, (1U << 8) - 1);
1507  break;
1508  }
1509 
1510  // Otherwise check the addressing mode.
1511  switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
1512  case ARMII::AddrMode3:
1513  case ARMII::AddrModeT2_i8:
1514  Limit = std::min(Limit, (1U << 8) - 1);
1515  break;
1516  case ARMII::AddrMode5:
1519  Limit = std::min(Limit, ((1U << 8) - 1) * 4);
1520  break;
1521  case ARMII::AddrModeT2_i12:
1522  // i12 supports only positive offset so these will be converted to
1523  // i8 opcodes. See llvm::rewriteT2FrameIndex.
1524  if (TFI->hasFP(MF) && AFI->hasStackFrame())
1525  Limit = std::min(Limit, (1U << 8) - 1);
1526  break;
1527  case ARMII::AddrMode4:
1528  case ARMII::AddrMode6:
1529  // Addressing modes 4 & 6 (load/store) instructions can't encode an
1530  // immediate offset for stack references.
1531  return 0;
1532  default:
1533  break;
1534  }
1535  break; // At most one FI per instruction
1536  }
1537  }
1538  }
1539 
1540  return Limit;
1541 }
1542 
1543 // In functions that realign the stack, it can be an advantage to spill the
1544 // callee-saved vector registers after realigning the stack. The vst1 and vld1
1545 // instructions take alignment hints that can improve performance.
1546 static void
1548  MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
1549  if (!SpillAlignedNEONRegs)
1550  return;
1551 
1552  // Naked functions don't spill callee-saved registers.
1554  return;
1555 
1556  // We are planning to use NEON instructions vst1 / vld1.
1557  if (!static_cast<const ARMSubtarget &>(MF.getSubtarget()).hasNEON())
1558  return;
1559 
1560  // Don't bother if the default stack alignment is sufficiently high.
1561  if (MF.getSubtarget().getFrameLowering()->getStackAlignment() >= 8)
1562  return;
1563 
1564  // Aligned spills require stack realignment.
1565  if (!static_cast<const ARMBaseRegisterInfo *>(
1566  MF.getSubtarget().getRegisterInfo())->canRealignStack(MF))
1567  return;
1568 
1569  // We always spill contiguous d-registers starting from d8. Count how many
1570  // needs spilling. The register allocator will almost always use the
1571  // callee-saved registers in order, but it can happen that there are holes in
1572  // the range. Registers above the hole will be spilled to the standard DPRCS
1573  // area.
1574  unsigned NumSpills = 0;
1575  for (; NumSpills < 8; ++NumSpills)
1576  if (!SavedRegs.test(ARM::D8 + NumSpills))
1577  break;
1578 
1579  // Don't do this for just one d-register. It's not worth it.
1580  if (NumSpills < 2)
1581  return;
1582 
1583  // Spill the first NumSpills D-registers after realigning the stack.
1584  MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
1585 
1586  // A scratch register is required for the vst1 / vld1 instructions.
1587  SavedRegs.set(ARM::R4);
1588 }
1589 
1591  BitVector &SavedRegs,
1592  RegScavenger *RS) const {
1593  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1594  // This tells PEI to spill the FP as if it is any other callee-save register
1595  // to take advantage the eliminateFrameIndex machinery. This also ensures it
1596  // is spilled in the order specified by getCalleeSavedRegs() to make it easier
1597  // to combine multiple loads / stores.
1598  bool CanEliminateFrame = true;
1599  bool CS1Spilled = false;
1600  bool LRSpilled = false;
1601  unsigned NumGPRSpills = 0;
1602  unsigned NumFPRSpills = 0;
1603  SmallVector<unsigned, 4> UnspilledCS1GPRs;
1604  SmallVector<unsigned, 4> UnspilledCS2GPRs;
1605  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1606  MF.getSubtarget().getRegisterInfo());
1607  const ARMBaseInstrInfo &TII =
1608  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1610  MachineFrameInfo &MFI = MF.getFrameInfo();
1613  (void)TRI; // Silence unused warning in non-assert builds.
1614  unsigned FramePtr = RegInfo->getFrameRegister(MF);
1615 
1616  // Spill R4 if Thumb2 function requires stack realignment - it will be used as
1617  // scratch register. Also spill R4 if Thumb2 function has varsized objects,
1618  // since it's not always possible to restore sp from fp in a single
1619  // instruction.
1620  // FIXME: It will be better just to find spare register here.
1621  if (AFI->isThumb2Function() &&
1622  (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
1623  SavedRegs.set(ARM::R4);
1624 
1625  // If a stack probe will be emitted, spill R4 and LR, since they are
1626  // clobbered by the stack probe call.
1627  // This estimate should be a safe, conservative estimate. The actual
1628  // stack probe is enabled based on the size of the local objects;
1629  // this estimate also includes the varargs store size.
1630  if (STI.isTargetWindows() &&
1631  WindowsRequiresStackProbe(MF, MFI.estimateStackSize(MF))) {
1632  SavedRegs.set(ARM::R4);
1633  SavedRegs.set(ARM::LR);
1634  }
1635 
1636  if (AFI->isThumb1OnlyFunction()) {
1637  // Spill LR if Thumb1 function uses variable length argument lists.
1638  if (AFI->getArgRegsSaveSize() > 0)
1639  SavedRegs.set(ARM::LR);
1640 
1641  // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
1642  // requires stack alignment. We don't know for sure what the stack size
1643  // will be, but for this, an estimate is good enough. If there anything
1644  // changes it, it'll be a spill, which implies we've used all the registers
1645  // and so R4 is already used, so not marking it here will be OK.
1646  // FIXME: It will be better just to find spare register here.
1647  if (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF) ||
1648  MFI.estimateStackSize(MF) > 508)
1649  SavedRegs.set(ARM::R4);
1650  }
1651 
1652  // See if we can spill vector registers to aligned stack.
1653  checkNumAlignedDPRCS2Regs(MF, SavedRegs);
1654 
1655  // Spill the BasePtr if it's used.
1656  if (RegInfo->hasBasePointer(MF))
1657  SavedRegs.set(RegInfo->getBaseRegister());
1658 
1659  // Don't spill FP if the frame can be eliminated. This is determined
1660  // by scanning the callee-save registers to see if any is modified.
1661  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
1662  for (unsigned i = 0; CSRegs[i]; ++i) {
1663  unsigned Reg = CSRegs[i];
1664  bool Spilled = false;
1665  if (SavedRegs.test(Reg)) {
1666  Spilled = true;
1667  CanEliminateFrame = false;
1668  }
1669 
1670  if (!ARM::GPRRegClass.contains(Reg)) {
1671  if (Spilled) {
1672  if (ARM::SPRRegClass.contains(Reg))
1673  NumFPRSpills++;
1674  else if (ARM::DPRRegClass.contains(Reg))
1675  NumFPRSpills += 2;
1676  else if (ARM::QPRRegClass.contains(Reg))
1677  NumFPRSpills += 4;
1678  }
1679  continue;
1680  }
1681 
1682  if (Spilled) {
1683  NumGPRSpills++;
1684 
1685  if (!STI.splitFramePushPop(MF)) {
1686  if (Reg == ARM::LR)
1687  LRSpilled = true;
1688  CS1Spilled = true;
1689  continue;
1690  }
1691 
1692  // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
1693  switch (Reg) {
1694  case ARM::LR:
1695  LRSpilled = true;
1697  case ARM::R0: case ARM::R1:
1698  case ARM::R2: case ARM::R3:
1699  case ARM::R4: case ARM::R5:
1700  case ARM::R6: case ARM::R7:
1701  CS1Spilled = true;
1702  break;
1703  default:
1704  break;
1705  }
1706  } else {
1707  if (!STI.splitFramePushPop(MF)) {
1708  UnspilledCS1GPRs.push_back(Reg);
1709  continue;
1710  }
1711 
1712  switch (Reg) {
1713  case ARM::R0: case ARM::R1:
1714  case ARM::R2: case ARM::R3:
1715  case ARM::R4: case ARM::R5:
1716  case ARM::R6: case ARM::R7:
1717  case ARM::LR:
1718  UnspilledCS1GPRs.push_back(Reg);
1719  break;
1720  default:
1721  UnspilledCS2GPRs.push_back(Reg);
1722  break;
1723  }
1724  }
1725  }
1726 
1727  bool ForceLRSpill = false;
1728  if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
1729  unsigned FnSize = GetFunctionSizeInBytes(MF, TII);
1730  // Force LR to be spilled if the Thumb function size is > 2048. This enables
1731  // use of BL to implement far jump. If it turns out that it's not needed
1732  // then the branch fix up path will undo it.
1733  if (FnSize >= (1 << 11)) {
1734  CanEliminateFrame = false;
1735  ForceLRSpill = true;
1736  }
1737  }
1738 
1739  // If any of the stack slot references may be out of range of an immediate
1740  // offset, make sure a register (or a spill slot) is available for the
1741  // register scavenger. Note that if we're indexing off the frame pointer, the
1742  // effective stack size is 4 bytes larger since the FP points to the stack
1743  // slot of the previous FP. Also, if we have variable sized objects in the
1744  // function, stack slot references will often be negative, and some of
1745  // our instructions are positive-offset only, so conservatively consider
1746  // that case to want a spill slot (or register) as well. Similarly, if
1747  // the function adjusts the stack pointer during execution and the
1748  // adjustments aren't already part of our stack size estimate, our offset
1749  // calculations may be off, so be conservative.
1750  // FIXME: We could add logic to be more precise about negative offsets
1751  // and which instructions will need a scratch register for them. Is it
1752  // worth the effort and added fragility?
1753  unsigned EstimatedStackSize =
1754  MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
1755 
1756  // Determine biggest (positive) SP offset in MachineFrameInfo.
1757  int MaxFixedOffset = 0;
1758  for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
1759  int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I);
1760  MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset);
1761  }
1762 
1763  bool HasFP = hasFP(MF);
1764  if (HasFP) {
1765  if (AFI->hasStackFrame())
1766  EstimatedStackSize += 4;
1767  } else {
1768  // If FP is not used, SP will be used to access arguments, so count the
1769  // size of arguments into the estimation.
1770  EstimatedStackSize += MaxFixedOffset;
1771  }
1772  EstimatedStackSize += 16; // For possible paddings.
1773 
1774  unsigned EstimatedRSStackSizeLimit = estimateRSStackSizeLimit(MF, this);
1775  int MaxFPOffset = getMaxFPOffset(MF.getFunction(), *AFI);
1776  bool BigFrameOffsets = EstimatedStackSize >= EstimatedRSStackSizeLimit ||
1777  MFI.hasVarSizedObjects() ||
1778  (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF)) ||
1779  // For large argument stacks fp relative addressed may overflow.
1780  (HasFP && (MaxFixedOffset - MaxFPOffset) >= (int)EstimatedRSStackSizeLimit);
1781  if (BigFrameOffsets ||
1782  !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
1783  AFI->setHasStackFrame(true);
1784 
1785  if (HasFP) {
1786  SavedRegs.set(FramePtr);
1787  // If the frame pointer is required by the ABI, also spill LR so that we
1788  // emit a complete frame record.
1789  if (MF.getTarget().Options.DisableFramePointerElim(MF) && !LRSpilled) {
1790  SavedRegs.set(ARM::LR);
1791  LRSpilled = true;
1792  NumGPRSpills++;
1793  auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR);
1794  if (LRPos != UnspilledCS1GPRs.end())
1795  UnspilledCS1GPRs.erase(LRPos);
1796  }
1797  auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr);
1798  if (FPPos != UnspilledCS1GPRs.end())
1799  UnspilledCS1GPRs.erase(FPPos);
1800  NumGPRSpills++;
1801  if (FramePtr == ARM::R7)
1802  CS1Spilled = true;
1803  }
1804 
1805  // This is true when we inserted a spill for an unused register that can now
1806  // be used for register scavenging.
1807  bool ExtraCSSpill = false;
1808 
1809  if (AFI->isThumb1OnlyFunction()) {
1810  // For Thumb1-only targets, we need some low registers when we save and
1811  // restore the high registers (which aren't allocatable, but could be
1812  // used by inline assembly) because the push/pop instructions can not
1813  // access high registers. If necessary, we might need to push more low
1814  // registers to ensure that there is at least one free that can be used
1815  // for the saving & restoring, and preferably we should ensure that as
1816  // many as are needed are available so that fewer push/pop instructions
1817  // are required.
1818 
1819  // Low registers which are not currently pushed, but could be (r4-r7).
1820  SmallVector<unsigned, 4> AvailableRegs;
1821 
1822  // Unused argument registers (r0-r3) can be clobbered in the prologue for
1823  // free.
1824  int EntryRegDeficit = 0;
1825  for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
1826  if (!MF.getRegInfo().isLiveIn(Reg)) {
1827  --EntryRegDeficit;
1828  LLVM_DEBUG(dbgs()
1829  << printReg(Reg, TRI)
1830  << " is unused argument register, EntryRegDeficit = "
1831  << EntryRegDeficit << "\n");
1832  }
1833  }
1834 
1835  // Unused return registers can be clobbered in the epilogue for free.
1836  int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
1837  LLVM_DEBUG(dbgs() << AFI->getReturnRegsCount()
1838  << " return regs used, ExitRegDeficit = "
1839  << ExitRegDeficit << "\n");
1840 
1841  int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit);
1842  LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
1843 
1844  // r4-r6 can be used in the prologue if they are pushed by the first push
1845  // instruction.
1846  for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
1847  if (SavedRegs.test(Reg)) {
1848  --RegDeficit;
1849  LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
1850  << " is saved low register, RegDeficit = "
1851  << RegDeficit << "\n");
1852  } else {
1853  AvailableRegs.push_back(Reg);
1854  LLVM_DEBUG(
1855  dbgs()
1856  << printReg(Reg, TRI)
1857  << " is non-saved low register, adding to AvailableRegs\n");
1858  }
1859  }
1860 
1861  // r7 can be used if it is not being used as the frame pointer.
1862  if (!HasFP) {
1863  if (SavedRegs.test(ARM::R7)) {
1864  --RegDeficit;
1865  LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
1866  << RegDeficit << "\n");
1867  } else {
1868  AvailableRegs.push_back(ARM::R7);
1869  LLVM_DEBUG(
1870  dbgs()
1871  << "%r7 is non-saved low register, adding to AvailableRegs\n");
1872  }
1873  }
1874 
1875  // Each of r8-r11 needs to be copied to a low register, then pushed.
1876  for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
1877  if (SavedRegs.test(Reg)) {
1878  ++RegDeficit;
1879  LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
1880  << " is saved high register, RegDeficit = "
1881  << RegDeficit << "\n");
1882  }
1883  }
1884 
1885  // LR can only be used by PUSH, not POP, and can't be used at all if the
1886  // llvm.returnaddress intrinsic is used. This is only worth doing if we
1887  // are more limited at function entry than exit.
1888  if ((EntryRegDeficit > ExitRegDeficit) &&
1889  !(MF.getRegInfo().isLiveIn(ARM::LR) &&
1891  if (SavedRegs.test(ARM::LR)) {
1892  --RegDeficit;
1893  LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
1894  << RegDeficit << "\n");
1895  } else {
1896  AvailableRegs.push_back(ARM::LR);
1897  LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
1898  }
1899  }
1900 
1901  // If there are more high registers that need pushing than low registers
1902  // available, push some more low registers so that we can use fewer push
1903  // instructions. This might not reduce RegDeficit all the way to zero,
1904  // because we can only guarantee that r4-r6 are available, but r8-r11 may
1905  // need saving.
1906  LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
1907  for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
1908  unsigned Reg = AvailableRegs.pop_back_val();
1909  LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
1910  << " to make up reg deficit\n");
1911  SavedRegs.set(Reg);
1912  NumGPRSpills++;
1913  CS1Spilled = true;
1914  assert(!MRI.isReserved(Reg) && "Should not be reserved");
1915  if (!MRI.isPhysRegUsed(Reg))
1916  ExtraCSSpill = true;
1917  UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));
1918  if (Reg == ARM::LR)
1919  LRSpilled = true;
1920  }
1921  LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
1922  << "\n");
1923  }
1924 
1925  // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
1926  // restore LR in that case.
1927  bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
1928 
1929  // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
1930  // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
1931  if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
1932  SavedRegs.set(ARM::LR);
1933  NumGPRSpills++;
1935  LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR);
1936  if (LRPos != UnspilledCS1GPRs.end())
1937  UnspilledCS1GPRs.erase(LRPos);
1938 
1939  ForceLRSpill = false;
1940  if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR))
1941  ExtraCSSpill = true;
1942  }
1943 
1944  // If stack and double are 8-byte aligned and we are spilling an odd number
1945  // of GPRs, spill one extra callee save GPR so we won't have to pad between
1946  // the integer and double callee save areas.
1947  LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
1948  unsigned TargetAlign = getStackAlignment();
1949  if (TargetAlign >= 8 && (NumGPRSpills & 1)) {
1950  if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
1951  for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
1952  unsigned Reg = UnspilledCS1GPRs[i];
1953  // Don't spill high register if the function is thumb. In the case of
1954  // Windows on ARM, accept R11 (frame pointer)
1955  if (!AFI->isThumbFunction() ||
1956  (STI.isTargetWindows() && Reg == ARM::R11) ||
1957  isARMLowRegister(Reg) ||
1958  (Reg == ARM::LR && !ExpensiveLRRestore)) {
1959  SavedRegs.set(Reg);
1960  LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
1961  << " to make up alignment\n");
1962  if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
1963  ExtraCSSpill = true;
1964  break;
1965  }
1966  }
1967  } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
1968  unsigned Reg = UnspilledCS2GPRs.front();
1969  SavedRegs.set(Reg);
1970  LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
1971  << " to make up alignment\n");
1972  if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
1973  ExtraCSSpill = true;
1974  }
1975  }
1976 
1977  // Estimate if we might need to scavenge a register at some point in order
1978  // to materialize a stack offset. If so, either spill one additional
1979  // callee-saved register or reserve a special spill slot to facilitate
1980  // register scavenging. Thumb1 needs a spill slot for stack pointer
1981  // adjustments also, even when the frame itself is small.
1982  if (BigFrameOffsets && !ExtraCSSpill) {
1983  // If any non-reserved CS register isn't spilled, just spill one or two
1984  // extra. That should take care of it!
1985  unsigned NumExtras = TargetAlign / 4;
1986  SmallVector<unsigned, 2> Extras;
1987  while (NumExtras && !UnspilledCS1GPRs.empty()) {
1988  unsigned Reg = UnspilledCS1GPRs.back();
1989  UnspilledCS1GPRs.pop_back();
1990  if (!MRI.isReserved(Reg) &&
1991  (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
1992  Reg == ARM::LR)) {
1993  Extras.push_back(Reg);
1994  NumExtras--;
1995  }
1996  }
1997  // For non-Thumb1 functions, also check for hi-reg CS registers
1998  if (!AFI->isThumb1OnlyFunction()) {
1999  while (NumExtras && !UnspilledCS2GPRs.empty()) {
2000  unsigned Reg = UnspilledCS2GPRs.back();
2001  UnspilledCS2GPRs.pop_back();
2002  if (!MRI.isReserved(Reg)) {
2003  Extras.push_back(Reg);
2004  NumExtras--;
2005  }
2006  }
2007  }
2008  if (NumExtras == 0) {
2009  for (unsigned Reg : Extras) {
2010  SavedRegs.set(Reg);
2011  if (!MRI.isPhysRegUsed(Reg))
2012  ExtraCSSpill = true;
2013  }
2014  }
2015  if (!ExtraCSSpill && !AFI->isThumb1OnlyFunction()) {
2016  // note: Thumb1 functions spill to R12, not the stack. Reserve a slot
2017  // closest to SP or frame pointer.
2018  assert(RS && "Register scavenging not provided");
2019  const TargetRegisterClass &RC = ARM::GPRRegClass;
2020  unsigned Size = TRI->getSpillSize(RC);
2021  unsigned Align = TRI->getSpillAlignment(RC);
2022  RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2023  }
2024  }
2025  }
2026 
2027  if (ForceLRSpill) {
2028  SavedRegs.set(ARM::LR);
2029  AFI->setLRIsSpilledForFarJump(true);
2030  }
2031 }
2032 
2033 MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
2036  const ARMBaseInstrInfo &TII =
2037  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2038  if (!hasReservedCallFrame(MF)) {
2039  // If we have alloca, convert as follows:
2040  // ADJCALLSTACKDOWN -> sub, sp, sp, amount
2041  // ADJCALLSTACKUP -> add, sp, sp, amount
2042  MachineInstr &Old = *I;
2043  DebugLoc dl = Old.getDebugLoc();
2044  unsigned Amount = TII.getFrameSize(Old);
2045  if (Amount != 0) {
2046  // We need to keep the stack aligned properly. To do this, we round the
2047  // amount of space needed for the outgoing arguments up to the next
2048  // alignment boundary.
2049  Amount = alignSPAdjust(Amount);
2050 
2052  assert(!AFI->isThumb1OnlyFunction() &&
2053  "This eliminateCallFramePseudoInstr does not support Thumb1!");
2054  bool isARM = !AFI->isThumbFunction();
2055 
2056  // Replace the pseudo instruction with a new instruction...
2057  unsigned Opc = Old.getOpcode();
2058  int PIdx = Old.findFirstPredOperandIdx();
2059  ARMCC::CondCodes Pred =
2060  (PIdx == -1) ? ARMCC::AL
2061  : (ARMCC::CondCodes)Old.getOperand(PIdx).getImm();
2062  unsigned PredReg = TII.getFramePred(Old);
2063  if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
2064  emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
2065  Pred, PredReg);
2066  } else {
2067  assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
2068  emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
2069  Pred, PredReg);
2070  }
2071  }
2072  }
2073  return MBB.erase(I);
2074 }
2075 
2076 /// Get the minimum constant for ARM that is greater than or equal to the
2077 /// argument. In ARM, constants can have any value that can be produced by
2078 /// rotating an 8-bit value to the right by an even number of bits within a
2079 /// 32-bit word.
2081  unsigned Shifted = 0;
2082 
2083  if (Value == 0)
2084  return 0;
2085 
2086  while (!(Value & 0xC0000000)) {
2087  Value = Value << 2;
2088  Shifted += 2;
2089  }
2090 
2091  bool Carry = (Value & 0x00FFFFFF);
2092  Value = ((Value & 0xFF000000) >> 24) + Carry;
2093 
2094  if (Value & 0x0000100)
2095  Value = Value & 0x000001FC;
2096 
2097  if (Shifted > 24)
2098  Value = Value >> (Shifted - 24);
2099  else
2100  Value = Value << (24 - Shifted);
2101 
2102  return Value;
2103 }
2104 
2105 // The stack limit in the TCB is set to this many bytes above the actual
2106 // stack limit.
2107 static const uint64_t kSplitStackAvailable = 256;
2108 
2109 // Adjust the function prologue to enable split stacks. This currently only
2110 // supports android and linux.
2111 //
2112 // The ABI of the segmented stack prologue is a little arbitrarily chosen, but
2113 // must be well defined in order to allow for consistent implementations of the
2114 // __morestack helper function. The ABI is also not a normal ABI in that it
2115 // doesn't follow the normal calling conventions because this allows the
2116 // prologue of each function to be optimized further.
2117 //
2118 // Currently, the ABI looks like (when calling __morestack)
2119 //
2120 // * r4 holds the minimum stack size requested for this function call
2121 // * r5 holds the stack size of the arguments to the function
2122 // * the beginning of the function is 3 instructions after the call to
2123 // __morestack
2124 //
2125 // Implementations of __morestack should use r4 to allocate a new stack, r5 to
2126 // place the arguments on to the new stack, and the 3-instruction knowledge to
2127 // jump directly to the body of the function when working on the new stack.
2128 //
2129 // An old (and possibly no longer compatible) implementation of __morestack for
2130 // ARM can be found at [1].
2131 //
2132 // [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
2134  MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
2135  unsigned Opcode;
2136  unsigned CFIIndex;
2137  const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
2138  bool Thumb = ST->isThumb();
2139 
2140  // Sadly, this currently doesn't support varargs, platforms other than
2141  // android/linux. Note that thumb1/thumb2 are support for android/linux.
2142  if (MF.getFunction().isVarArg())
2143  report_fatal_error("Segmented stacks do not support vararg functions.");
2144  if (!ST->isTargetAndroid() && !ST->isTargetLinux())
2145  report_fatal_error("Segmented stacks not supported on this platform.");
2146 
2147  MachineFrameInfo &MFI = MF.getFrameInfo();
2148  MachineModuleInfo &MMI = MF.getMMI();
2149  MCContext &Context = MMI.getContext();
2150  const MCRegisterInfo *MRI = Context.getRegisterInfo();
2151  const ARMBaseInstrInfo &TII =
2152  *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2153  ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
2154  DebugLoc DL;
2155 
2156  uint64_t StackSize = MFI.getStackSize();
2157 
2158  // Do not generate a prologue for leaf functions with a stack of size zero.
2159  // For non-leaf functions we have to allow for the possibility that the
2160  // callis to a non-split function, as in PR37807. This function could also
2161  // take the address of a non-split function. When the linker tries to adjust
2162  // its non-existent prologue, it would fail with an error. Mark the object
2163  // file so that such failures are not errors. See this Go language bug-report
2164  // https://go-review.googlesource.com/c/go/+/148819/
2165  if (StackSize == 0 && !MFI.hasTailCall()) {
2166  MF.getMMI().setHasNosplitStack(true);
2167  return;
2168  }
2169 
2170  // Use R4 and R5 as scratch registers.
2171  // We save R4 and R5 before use and restore them before leaving the function.
2172  unsigned ScratchReg0 = ARM::R4;
2173  unsigned ScratchReg1 = ARM::R5;
2174  uint64_t AlignedStackSize;
2175 
2176  MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
2177  MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
2178  MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock();
2181 
2182  // Grab everything that reaches PrologueMBB to update there liveness as well.
2183  SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
2185  WalkList.push_back(&PrologueMBB);
2186 
2187  do {
2188  MachineBasicBlock *CurMBB = WalkList.pop_back_val();
2189  for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
2190  if (BeforePrologueRegion.insert(PredBB).second)
2191  WalkList.push_back(PredBB);
2192  }
2193  } while (!WalkList.empty());
2194 
2195  // The order in that list is important.
2196  // The blocks will all be inserted before PrologueMBB using that order.
2197  // Therefore the block that should appear first in the CFG should appear
2198  // first in the list.
2199  MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
2200  PostStackMBB};
2201 
2202  for (MachineBasicBlock *B : AddedBlocks)
2203  BeforePrologueRegion.insert(B);
2204 
2205  for (const auto &LI : PrologueMBB.liveins()) {
2206  for (MachineBasicBlock *PredBB : BeforePrologueRegion)
2207  PredBB->addLiveIn(LI);
2208  }
2209 
2210  // Remove the newly added blocks from the list, since we know
2211  // we do not have to do the following updates for them.
2212  for (MachineBasicBlock *B : AddedBlocks) {
2213  BeforePrologueRegion.erase(B);
2214  MF.insert(PrologueMBB.getIterator(), B);
2215  }
2216 
2217  for (MachineBasicBlock *MBB : BeforePrologueRegion) {
2218  // Make sure the LiveIns are still sorted and unique.
2219  MBB->sortUniqueLiveIns();
2220  // Replace the edges to PrologueMBB by edges to the sequences
2221  // we are about to add.
2222  MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
2223  }
2224 
2225  // The required stack size that is aligned to ARM constant criterion.
2226  AlignedStackSize = alignToARMConstant(StackSize);
2227 
2228  // When the frame size is less than 256 we just compare the stack
2229  // boundary directly to the value of the stack pointer, per gcc.
2230  bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
2231 
2232  // We will use two of the callee save registers as scratch registers so we
2233  // need to save those registers onto the stack.
2234  // We will use SR0 to hold stack limit and SR1 to hold the stack size
2235  // requested and arguments for __morestack().
2236  // SR0: Scratch Register #0
2237  // SR1: Scratch Register #1
2238  // push {SR0, SR1}
2239  if (Thumb) {
2240  BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))
2241  .add(predOps(ARMCC::AL))
2242  .addReg(ScratchReg0)
2243  .addReg(ScratchReg1);
2244  } else {
2245  BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
2246  .addReg(ARM::SP, RegState::Define)
2247  .addReg(ARM::SP)
2248  .add(predOps(ARMCC::AL))
2249  .addReg(ScratchReg0)
2250  .addReg(ScratchReg1);
2251  }
2252 
2253  // Emit the relevant DWARF information about the change in stack pointer as
2254  // well as where to find both r4 and r5 (the callee-save registers)
2255  CFIIndex =
2257  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2258  .addCFIIndex(CFIIndex);
2260  nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
2261  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2262  .addCFIIndex(CFIIndex);
2264  nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
2265  BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2266  .addCFIIndex(CFIIndex);
2267 
2268  // mov SR1, sp
2269  if (Thumb) {
2270  BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
2271  .addReg(ARM::SP)
2272  .add(predOps(ARMCC::AL));
2273  } else if (CompareStackPointer) {
2274  BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
2275  .addReg(ARM::SP)
2276  .add(predOps(ARMCC::AL))
2277  .add(condCodeOp());
2278  }
2279 
2280  // sub SR1, sp, #StackSize
2281  if (!CompareStackPointer && Thumb) {
2282  BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
2283  .add(condCodeOp())
2284  .addReg(ScratchReg1)
2285  .addImm(AlignedStackSize)
2286  .add(predOps(ARMCC::AL));
2287  } else if (!CompareStackPointer) {
2288  BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
2289  .addReg(ARM::SP)
2290  .addImm(AlignedStackSize)
2291  .add(predOps(ARMCC::AL))
2292  .add(condCodeOp());
2293  }
2294 
2295  if (Thumb && ST->isThumb1Only()) {
2296  unsigned PCLabelId = ARMFI->createPICLabelUId();
2298  MF.getFunction().getContext(), "__STACK_LIMIT", PCLabelId, 0);
2300  unsigned CPI = MCP->getConstantPoolIndex(NewCPV, 4);
2301 
2302  // ldr SR0, [pc, offset(STACK_LIMIT)]
2303  BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
2304  .addConstantPoolIndex(CPI)
2305  .add(predOps(ARMCC::AL));
2306 
2307  // ldr SR0, [SR0]
2308  BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
2309  .addReg(ScratchReg0)
2310  .addImm(0)
2311  .add(predOps(ARMCC::AL));
2312  } else {
2313  // Get TLS base address from the coprocessor
2314  // mrc p15, #0, SR0, c13, c0, #3
2315  BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0)
2316  .addImm(15)
2317  .addImm(0)
2318  .addImm(13)
2319  .addImm(0)
2320  .addImm(3)
2321  .add(predOps(ARMCC::AL));
2322 
2323  // Use the last tls slot on android and a private field of the TCP on linux.
2324  assert(ST->isTargetAndroid() || ST->isTargetLinux());
2325  unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
2326 
2327  // Get the stack limit from the right offset
2328  // ldr SR0, [sr0, #4 * TlsOffset]
2329  BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0)
2330  .addReg(ScratchReg0)
2331  .addImm(4 * TlsOffset)
2332  .add(predOps(ARMCC::AL));
2333  }
2334 
2335  // Compare stack limit with stack size requested.
2336  // cmp SR0, SR1
2337  Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
2338  BuildMI(GetMBB, DL, TII.get(Opcode))
2339  .addReg(ScratchReg0)
2340  .addReg(ScratchReg1)
2341  .add(predOps(ARMCC::AL));
2342 
2343  // This jump is taken if StackLimit < SP - stack required.
2344  Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
2345  BuildMI(GetMBB, DL, TII.get(Opcode)).addMBB(PostStackMBB)
2346  .addImm(ARMCC::LO)
2347  .addReg(ARM::CPSR);
2348 
2349 
2350  // Calling __morestack(StackSize, Size of stack arguments).
2351  // __morestack knows that the stack size requested is in SR0(r4)
2352  // and amount size of stack arguments is in SR1(r5).
2353 
2354  // Pass first argument for the __morestack by Scratch Register #0.
2355  // The amount size of stack required
2356  if (Thumb) {
2357  BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
2358  .add(condCodeOp())
2359  .addImm(AlignedStackSize)
2360  .add(predOps(ARMCC::AL));
2361  } else {
2362  BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
2363  .addImm(AlignedStackSize)
2364  .add(predOps(ARMCC::AL))
2365  .add(condCodeOp());
2366  }
2367  // Pass second argument for the __morestack by Scratch Register #1.
2368  // The amount size of stack consumed to save function arguments.
2369  if (Thumb) {
2370  BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
2371  .add(condCodeOp())
2372  .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
2373  .add(predOps(ARMCC::AL));
2374  } else {
2375  BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
2376  .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
2377  .add(predOps(ARMCC::AL))
2378  .add(condCodeOp());
2379  }
2380 
2381  // push {lr} - Save return address of this function.
2382  if (Thumb) {
2383  BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))
2384  .add(predOps(ARMCC::AL))
2385  .addReg(ARM::LR);
2386  } else {
2387  BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
2388  .addReg(ARM::SP, RegState::Define)
2389  .addReg(ARM::SP)
2390  .add(predOps(ARMCC::AL))
2391  .addReg(ARM::LR);
2392  }
2393 
2394  // Emit the DWARF info about the change in stack as well as where to find the
2395  // previous link register
2396  CFIIndex =
2398  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2399  .addCFIIndex(CFIIndex);
2401  nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));
2402  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2403  .addCFIIndex(CFIIndex);
2404 
2405  // Call __morestack().
2406  if (Thumb) {
2407  BuildMI(AllocMBB, DL, TII.get(ARM::tBL))
2408  .add(predOps(ARMCC::AL))
2409  .addExternalSymbol("__morestack");
2410  } else {
2411  BuildMI(AllocMBB, DL, TII.get(ARM::BL))
2412  .addExternalSymbol("__morestack");
2413  }
2414 
2415  // pop {lr} - Restore return address of this original function.
2416  if (Thumb) {
2417  if (ST->isThumb1Only()) {
2418  BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
2419  .add(predOps(ARMCC::AL))
2420  .addReg(ScratchReg0);
2421  BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
2422  .addReg(ScratchReg0)
2423  .add(predOps(ARMCC::AL));
2424  } else {
2425  BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
2426  .addReg(ARM::LR, RegState::Define)
2427  .addReg(ARM::SP, RegState::Define)
2428  .addReg(ARM::SP)
2429  .addImm(4)
2430  .add(predOps(ARMCC::AL));
2431  }
2432  } else {
2433  BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
2434  .addReg(ARM::SP, RegState::Define)
2435  .addReg(ARM::SP)
2436  .add(predOps(ARMCC::AL))
2437  .addReg(ARM::LR);
2438  }
2439 
2440  // Restore SR0 and SR1 in case of __morestack() was called.
2441  // __morestack() will skip PostStackMBB block so we need to restore
2442  // scratch registers from here.
2443  // pop {SR0, SR1}
2444  if (Thumb) {
2445  BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
2446  .add(predOps(ARMCC::AL))
2447  .addReg(ScratchReg0)
2448  .addReg(ScratchReg1);
2449  } else {
2450  BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
2451  .addReg(ARM::SP, RegState::Define)
2452  .addReg(ARM::SP)
2453  .add(predOps(ARMCC::AL))
2454  .addReg(ScratchReg0)
2455  .addReg(ScratchReg1);
2456  }
2457 
2458  // Update the CFA offset now that we've popped
2459  CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
2460  BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2461  .addCFIIndex(CFIIndex);
2462 
2463  // Return from this function.
2464  BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL));
2465 
2466  // Restore SR0 and SR1 in case of __morestack() was not called.
2467  // pop {SR0, SR1}
2468  if (Thumb) {
2469  BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))
2470  .add(predOps(ARMCC::AL))
2471  .addReg(ScratchReg0)
2472  .addReg(ScratchReg1);
2473  } else {
2474  BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
2475  .addReg(ARM::SP, RegState::Define)
2476  .addReg(ARM::SP)
2477  .add(predOps(ARMCC::AL))
2478  .addReg(ScratchReg0)
2479  .addReg(ScratchReg1);
2480  }
2481 
2482  // Update the CFA offset now that we've popped
2483  CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
2484  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2485  .addCFIIndex(CFIIndex);
2486 
2487  // Tell debuggers that r4 and r5 are now the same as they were in the
2488  // previous function, that they're the "Same Value".
2490  nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
2491  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2492  .addCFIIndex(CFIIndex);
2494  nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
2495  BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2496  .addCFIIndex(CFIIndex);
2497 
2498  // Organizing MBB lists
2499  PostStackMBB->addSuccessor(&PrologueMBB);
2500 
2501  AllocMBB->addSuccessor(PostStackMBB);
2502 
2503  GetMBB->addSuccessor(PostStackMBB);
2504  GetMBB->addSuccessor(AllocMBB);
2505 
2506  McrMBB->addSuccessor(GetMBB);
2507 
2508  PrevStackMBB->addSuccessor(McrMBB);
2509 
2510 #ifdef EXPENSIVE_CHECKS
2511  MF.verify();
2512 #endif
2513 }
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:177
const MachineInstrBuilder & add(const MachineOperand &MO) const
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BitVector & set()
Definition: BitVector.h:398
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
#define R4(n)
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
LLVMContext & Context
int findFirstPredOperandIdx() const
Find the index of the first operand in the operand list that is used to represent the predicate...
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
bool isThumb() const
Definition: ARMSubtarget.h:712
This class represents lattice values for constants.
Definition: AllocatorList.h:24
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static cl::opt< bool > SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), cl::desc("Align ARM NEON spills in prolog and epilog"))
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:383
ARMConstantPoolValue - ARM specific constantpool value.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getReg() const
getReg - Returns the register number.
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset)
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:488
unsigned Reg
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &MBB) const override
Adjust the prologue to have the function use segmented stacks.
bool hasV7Ops() const
Definition: ARMSubtarget.h:540
bool test(unsigned Idx) const
Definition: BitVector.h:502
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:321
unsigned second
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
F(f)
MachineModuleInfo & getMMI() const
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
bool isThumb1Only() const
Definition: ARMSubtarget.h:713
#define R2(n)
void setGPRCalleeSavedArea2Offset(unsigned o)
static uint32_t alignToARMConstant(uint32_t Value)
Get the minimum constant for ARM that is greater than or equal to the argument.
static MCCFIInstruction createDefCfaOffset(MCSymbol *L, int Offset)
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:475
static bool isThumb(const MCSubtargetInfo &STI)
unsigned getSpillSize(const TargetRegisterClass &RC) const
Return the size in bytes of the stack slot allocated to hold a spilled copy of a register from class ...
return AArch64::GPR64RegClass contains(Reg)
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:491
static const uint64_t kSplitStackAvailable
bool isTargetELF() const
Definition: ARMSubtarget.h:657
unsigned getSpillAlignment(const TargetRegisterClass &RC) const
Return the minimum required alignment in bytes for a spill slot for a register of this class...
void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Given a machine basic block that branched to &#39;Old&#39;, change the code and CFG so that it branches to &#39;N...
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
unsigned getFramePred(const MachineInstr &MI) const
Returns predicate register associated with the given frame instruction.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:412
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
virtual bool hasFP(const MachineFunction &MF) const =0
hasFP - Return true if the specified function should have a dedicated frame pointer register...
unsigned getFrameRegister(const MachineFunction &MF) const override
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required, we reserve argument space for call sites in the function immediately on entry to the current function.
MachineBasicBlock iterator that automatically skips over MIs that are inside bundles (i...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
This file contains the simple types necessary to represent the attributes associated with functions a...
static int sizeOfSPAdjustment(const MachineInstr &MI)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
int getDwarfRegNum(unsigned RegNum, bool isEH) const
Map a target register to an equivalent dwarf register number.
bool keepFramePointer(const MachineFunction &MF) const override
Return true if the target wants to keep the frame pointer regardless of the function attribute "frame...
void setDPRCalleeSavedAreaOffset(unsigned o)
LLVM_NODISCARD unsigned addFrameInst(const MCCFIInstruction &Inst)
bool enableCalleeSaveSkip(const MachineFunction &MF) const override
Returns true if the target can safely skip saving callee-saved registers for noreturn nounwind functi...
unsigned getArgRegsSaveSize() const
void setFramePtrSpillOffset(unsigned o)
Context object for machine code objects.
Definition: MCContext.h:63
static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg, unsigned SrcReg, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
unsigned getDPRCalleeSavedAreaSize() const
bool splitFramePushPop(const MachineFunction &MF) const
Returns true if the frame setup is split into two separate pushes (first r0-r7,lr then r8-r11)...
Definition: ARMSubtarget.h:733
void setDPRCalleeSavedGapSize(unsigned s)
unsigned getNumAlignedDPRCS2Regs() const
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
const MCContext & getContext() const
int alignSPAdjust(int SPAdj) const
alignSPAdjust - This method aligns the stack adjustment to the correct alignment. ...
bool hasV6T2Ops() const
Definition: ARMSubtarget.h:539
virtual const TargetInstrInfo * getInstrInfo() const
static bool isARMArea1Register(unsigned Reg, bool isIOS)
isARMArea1Register - Returns true if the register is a low register (r0-r7) or a stack/pc register th...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
Analysis containing CSE Info
Definition: CSEInfo.cpp:21
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1252
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
TargetInstrInfo - Interface to description of machine instruction set.
unsigned getDefRegState(bool B)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static MachineBasicBlock::iterator skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs)
Skip past the code inserted by emitAlignedDPRCS2Spills, and return an iterator to the following instr...
static MCCFIInstruction createSameValue(MCSymbol *L, unsigned Register)
.cfi_same_value Current value of Register is the same as in the previous frame.
Definition: MCDwarf.h:533
static bool isCalleeSavedRegister(unsigned Reg, const MCPhysReg *CSRegs)
int ResolveFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg, int SPAdj) const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
This file declares the machine register scavenger class.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:120
static MCCFIInstruction createDefCfa(MCSymbol *L, unsigned Register, int Offset)
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it...
Definition: MCDwarf.h:461
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool cannotEliminateFrame(const MachineFunction &MF) const
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register)
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:468
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Returns the size of the specified MachineInstr.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
unsigned getGPRCalleeSavedArea1Size() const
int getStackProtectorIndex() const
Return the index for the stack protector object.
unsigned getMaxAlignment() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
bool hasBasePointer(const MachineFunction &MF) const
void setGPRCalleeSavedArea2Size(unsigned s)
static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI)
We need the offset of the frame pointer relative to other MachineFrameInfo offsets which are encoded ...
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:767
unsigned getReturnRegsCount() const
static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI)
Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers starting from d8...
self_iterator getIterator()
Definition: ilist_node.h:82
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
iterator_range< pred_iterator > predecessors()
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1214
const ARMFrameLowering * getFrameLowering() const override
Definition: ARMSubtarget.h:499
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:193
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
iterator erase(const_iterator CI)
Definition: SmallVector.h:445
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
size_t size() const
Definition: SmallVector.h:53
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1207
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned first
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1116
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:497
int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override
getFrameIndexReference - Provide a base+offset reference to an FI slot for debug info.
unsigned getDPRCalleeSavedGapSize() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI)
Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers starting from d8...
#define R6(n)
void setGPRCalleeSavedArea1Size(unsigned s)
unsigned getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call...
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:378
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213
bool isTargetLinux() const
Definition: ARMSubtarget.h:651
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
static bool isARMArea2Register(unsigned Reg, bool isIOS)
bool isTargetAndroid() const
Definition: ARMSubtarget.h:698
Information about stack frame layout on the target.
static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, const TargetInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const unsigned Reg, const unsigned Alignment, const bool MustBeSingleInstruction)
Emit an instruction sequence that will align the address in register Reg by zero-ing out the lower bi...
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
void setOffsetAdjustment(int Adj)
Set the correction for frame offsets.
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
CodeModel::Model getCodeModel() const
Returns the code model.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
ARMFrameLowering(const ARMSubtarget &sti)
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
unsigned getReturnOpcode() const
Returns the correct return opcode for the current feature set.
Definition: ARMSubtarget.h:803
unsigned getArgumentStackSize() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
static unsigned estimateRSStackSizeLimit(MachineFunction &MF, const TargetFrameLowering *TFI)
estimateRSStackSizeLimit - Look at each instruction that references stack frames and return the stack...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
static bool isPopOpcode(int Opc)
bool isLiveIn(unsigned Reg) const
static bool isARMLowRegister(unsigned Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:161
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
uint16_t getEncodingValue(unsigned RegNo) const
Returns the encoding for RegNo.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
unsigned getGPRCalleeSavedArea2Size() const
void setGPRCalleeSavedArea1Offset(unsigned o)
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use...
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg...
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
bool isReturnAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
TargetOptions Options
Definition: TargetMachine.h:97
#define I(x, y, z)
Definition: MD5.cpp:58
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:503
virtual const TargetFrameLowering * getFrameLowering() const
void setDPRCalleeSavedAreaSize(unsigned s)
int getOffsetAdjustment() const
Return the correction for frame offsets.
uint32_t Size
Definition: Profile.cpp:47
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool hasV5TOps() const
Definition: ARMSubtarget.h:534
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
iterator_range< livein_iterator > liveins() const
static bool isARMArea3Register(unsigned Reg, bool isIOS)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
static bool WindowsRequiresStackProbe(const MachineFunction &MF, size_t StackSizeInBytes)
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:295
bool needsStackRealignment(const MachineFunction &MF) const
True if storage within the function requires the stack pointer to be aligned more than the normal cal...
LLVM Value Representation.
Definition: Value.h:73
static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, const ARMBaseInstrInfo &TII)
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector< CalleeSavedInfo > &CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
unsigned getFramePtrSpillOffset() const
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:331
bool isTargetWindows() const
Definition: ARMSubtarget.h:654
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
bool addRegisterKilled(unsigned IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
IRTranslator LLVM IR MI
static bool isCSRestore(MachineInstr &MI, const ARMBaseInstrInfo &TII, const MCPhysReg *CSRegs)
BitVector getPristineRegs(const MachineFunction &MF) const
Return a set of physical registers that are pristine.
const ARMSubtarget & STI
static void checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs)
#define LLVM_DEBUG(X)
Definition: Debug.h:123
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
unsigned getReg() const
bool hasTailCall() const
Returns true if the function contains a tail call.
void setObjectAlignment(int ObjectIdx, unsigned Align)
setObjectAlignment - Change the alignment of the specified stack object.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects...
unsigned getConstantPoolIndex(const Constant *C, unsigned Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one...
static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
bool isReserved(unsigned PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
This class contains meta information specific to a module.