52 #define DEBUG_TYPE "x86-cf-opt" 56 cl::desc(
"Avoid optimizing x86 call frames for size"),
75 CallContext() : FrameSetup(nullptr), ArgStoreVector(4, nullptr) {}
87 int64_t ExpectedDist = 0;
93 bool NoStackParams =
false;
113 enum InstClassification { Convert, Skip, Exit };
120 StringRef getPassName()
const override {
return "X86 Optimize Call Frame"; }
127 unsigned Log2SlotSize;
133 "X86 Call Frame Optimization",
false,
false)
145 if (STI->isTargetDarwin() &&
146 (!MF.getLandingPads().empty() ||
147 (MF.getFunction().needsUnwindTableEntry() && !TFL->hasFP(MF))))
152 if (STI->isTargetWin64())
162 unsigned FrameSetupOpcode =
TII->getCallFrameSetupOpcode();
163 unsigned FrameDestroyOpcode =
TII->getCallFrameDestroyOpcode();
165 bool InsideFrameSequence =
false;
167 if (
MI.getOpcode() == FrameSetupOpcode) {
168 if (InsideFrameSequence)
170 InsideFrameSequence =
true;
171 }
else if (
MI.getOpcode() == FrameDestroyOpcode) {
172 if (!InsideFrameSequence)
174 InsideFrameSequence =
false;
178 if (InsideFrameSequence)
188 ContextVector &CallSeqVector) {
193 if (CannotReserveFrame)
196 unsigned StackAlign = TFL->getStackAlignment();
198 int64_t Advantage = 0;
199 for (
auto CC : CallSeqVector) {
203 if (CC.NoStackParams)
219 if (CC.ExpectedDist % StackAlign)
223 Advantage += (CC.ExpectedDist >> Log2SlotSize) * 3;
227 return Advantage >= 0;
230 bool X86CallFrameOptimization::runOnMachineFunction(
MachineFunction &MF) {
232 TII = STI->getInstrInfo();
233 TFL = STI->getFrameLowering();
240 Log2SlotSize =
Log2_32(SlotSize);
242 if (skipFunction(MF.
getFunction()) || !isLegal(MF))
245 unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode();
247 bool Changed =
false;
249 ContextVector CallSeqVector;
253 if (
MI.getOpcode() == FrameSetupOpcode) {
255 collectCallInfo(MF, MBB,
MI, Context);
256 CallSeqVector.push_back(Context);
259 if (!isProfitable(MF, CallSeqVector))
262 for (
auto CC : CallSeqVector) {
264 adjustCallSequence(MF, CC);
272 X86CallFrameOptimization::InstClassification
273 X86CallFrameOptimization::classifyInstruction(
281 switch (MI->getOpcode()) {
284 case X86::AND64mi8: {
286 return ImmOp.
getImm() == 0 ? Convert : Exit;
292 return ImmOp.
getImm() == -1 ? Convert : Exit;
326 if (MI->isCall() || MI->mayStore())
332 unsigned int Reg = MO.getReg();
333 if (!RegInfo.isPhysicalRegister(Reg))
338 for (
unsigned int U : UsedRegs)
339 if (RegInfo.regsOverlap(Reg, U))
357 assert(I->getOpcode() ==
TII->getCallFrameSetupOpcode());
359 Context.FrameSetup = FrameSetup;
363 unsigned int MaxAdjust =
TII->getFrameSize(*FrameSetup) >> Log2SlotSize;
367 Context.NoStackParams =
true;
374 while (I->getOpcode() == X86::LEA32r || I->isDebugInstr())
378 auto StackPtrCopyInst = MBB.
end();
387 for (
auto J = I; !J->isCall(); ++J)
388 if (J->isCopy() && J->getOperand(0).isReg() && J->getOperand(1).isReg() &&
389 J->getOperand(1).getReg() == StackPtr) {
390 StackPtrCopyInst = J;
391 Context.SPCopy = &*J++;
392 StackPtr = Context.SPCopy->getOperand(0).getReg();
401 Context.ArgStoreVector.resize(MaxAdjust,
nullptr);
405 for (InstClassification Classification = Skip; Classification != Exit; ++
I) {
407 if (I == StackPtrCopyInst)
409 Classification = classifyInstruction(MBB, I, RegInfo, UsedRegs);
410 if (Classification != Convert)
432 "Negative stack displacement when passing parameters");
435 if (StackDisp & (SlotSize - 1))
437 StackDisp >>= Log2SlotSize;
439 assert((
size_t)StackDisp < Context.ArgStoreVector.size() &&
440 "Function call has more parameters than the stack is adjusted for.");
443 if (Context.ArgStoreVector[StackDisp] !=
nullptr)
445 Context.ArgStoreVector[StackDisp] = &*
I;
450 unsigned int Reg = MO.getReg();
451 if (RegInfo.isPhysicalRegister(Reg))
460 if (I == MBB.
end() || !I->isCall())
464 if ((++I)->getOpcode() !=
TII->getCallFrameDestroyOpcode())
469 auto MMI = Context.ArgStoreVector.begin(), MME = Context.ArgStoreVector.end();
470 for (; MMI != MME; ++MMI, Context.ExpectedDist += SlotSize)
475 if (MMI == Context.ArgStoreVector.begin())
480 for (; MMI != MME; ++MMI)
484 Context.UsePush =
true;
488 const CallContext &Context) {
494 TII->setFrameAdjustment(*FrameSetup, Context.ExpectedDist);
496 DebugLoc DL = FrameSetup->getDebugLoc();
497 bool Is64Bit = STI->is64Bit();
501 for (
int Idx = (Context.ExpectedDist >> Log2SlotSize) - 1; Idx >= 0; --Idx) {
506 switch (Store->getOpcode()) {
517 PushOpcode = Is64Bit ? X86::PUSH64i32 : X86::PUSHi32;
522 if (PushOp.
isImm()) {
523 int64_t Val = PushOp.
getImm();
525 PushOpcode = Is64Bit ? X86::PUSH64i8 : X86::PUSH32i8;
527 Push =
BuildMI(MBB, Context.Call, DL,
TII->get(PushOpcode)).
add(PushOp);
535 if (Is64Bit && Store->getOpcode() == X86::MOV32mr) {
536 unsigned UndefReg =
MRI->createVirtualRegister(&X86::GR64RegClass);
537 Reg =
MRI->createVirtualRegister(&X86::GR64RegClass);
538 BuildMI(MBB, Context.Call, DL,
TII->get(X86::IMPLICIT_DEF), UndefReg);
539 BuildMI(MBB, Context.Call, DL,
TII->get(X86::INSERT_SUBREG),
Reg)
547 bool SlowPUSHrmm = STI->isAtom() || STI->isSLM();
552 if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) {
553 PushOpcode = Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm;
554 Push =
BuildMI(MBB, Context.Call, DL,
TII->get(PushOpcode));
562 PushOpcode = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
563 Push =
BuildMI(MBB, Context.Call, DL,
TII->get(PushOpcode))
576 MBB, std::next(Push), DL,
584 if (Context.SPCopy &&
MRI->use_empty(Context.SPCopy->getOperand(0).getReg()))
585 Context.SPCopy->eraseFromParent();
593 MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush(
609 if (!
MRI->hasOneNonDBGUse(Reg))
616 if ((DefMI.
getOpcode() != X86::MOV32rm &&
618 DefMI.
getParent() != FrameSetup->getParent())
624 if (I->isLoadFoldBarrier())
631 return new X86CallFrameOptimization();
const MachineInstrBuilder & add(const MachineOperand &MO) const
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
This class represents lattice values for constants.
Implements a dense probed hash-table based set.
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
INITIALIZE_PASS(X86CallFrameOptimization, DEBUG_TYPE, "X86 Call Frame Optimization", false, false) bool X86CallFrameOptimization
constexpr bool isInt< 8 >(int64_t x)
AddrNumOperands - Total number of operands in a memory reference.
FunctionPass * createX86CallFrameOptimization()
Return a pass that optimizes the code-size of x86 call sequences.
void initializeX86CallFrameOptimizationPass(PassRegistry &)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
unsigned getSlotSize() const
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int Adjustment)
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
const HexagonInstrInfo * TII
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
unsigned getStackRegister() const
void setHasPushSequences(bool HasPush)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
initializer< Ty > init(const Ty &Val)
unsigned const MachineRegisterInfo * MRI
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
std::pair< iterator, bool > insert(const ValueT &V)
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
FunctionPass class - This class is used to implement most global optimizations.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
MachineOperand class - Representation of each machine instruction operand.
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly. ...
static cl::opt< bool > NoX86CFOpt("no-x86-call-frame-opt", cl::desc("Avoid optimizing x86 call frames for size"), cl::init(false), cl::Hidden)
const Function & getFunction() const
Return the LLVM function that this machine code represents.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
const MachineBasicBlock * getParent() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
StringRef - Represent a constant reference to a string, i.e.
AddrSegmentReg - The operand # of the segment in the memory operand.
const MachineOperand & getOperand(unsigned i) const