53 #define DEBUG_TYPE "x86-avoid-SFB" 60 "x86-sfb-inspection-limit",
61 cl::desc(
"X86: Number of instructions backward to " 62 "inspect for store forwarding blocks."),
67 using DisplacementSizeMap = std::map<int64_t, unsigned>;
77 return "X86 Avoid Store Forwarding Blocks";
92 BlockedLoadsStoresPairs;
103 const DisplacementSizeMap &BlockingStoresDispSizeMap);
107 int64_t LMMOffset, int64_t SMMOffset);
111 int64_t StoreDisp,
unsigned Size, int64_t LMMOffset,
130 return new X86AvoidSFBPass();
134 return Opcode == X86::MOVUPSrm || Opcode == X86::MOVAPSrm ||
135 Opcode == X86::VMOVUPSrm || Opcode == X86::VMOVAPSrm ||
136 Opcode == X86::VMOVUPDrm || Opcode == X86::VMOVAPDrm ||
137 Opcode == X86::VMOVDQUrm || Opcode == X86::VMOVDQArm ||
138 Opcode == X86::VMOVUPSZ128rm || Opcode == X86::VMOVAPSZ128rm ||
139 Opcode == X86::VMOVUPDZ128rm || Opcode == X86::VMOVAPDZ128rm ||
140 Opcode == X86::VMOVDQU64Z128rm || Opcode == X86::VMOVDQA64Z128rm ||
141 Opcode == X86::VMOVDQU32Z128rm || Opcode == X86::VMOVDQA32Z128rm;
144 return Opcode == X86::VMOVUPSYrm || Opcode == X86::VMOVAPSYrm ||
145 Opcode == X86::VMOVUPDYrm || Opcode == X86::VMOVAPDYrm ||
146 Opcode == X86::VMOVDQUYrm || Opcode == X86::VMOVDQAYrm ||
147 Opcode == X86::VMOVUPSZ256rm || Opcode == X86::VMOVAPSZ256rm ||
148 Opcode == X86::VMOVUPDZ256rm || Opcode == X86::VMOVAPDZ256rm ||
149 Opcode == X86::VMOVDQU64Z256rm || Opcode == X86::VMOVDQA64Z256rm ||
150 Opcode == X86::VMOVDQU32Z256rm || Opcode == X86::VMOVDQA32Z256rm;
161 return StOpcode == X86::MOVUPSmr || StOpcode == X86::MOVAPSmr;
164 return StOpcode == X86::VMOVUPSmr || StOpcode == X86::VMOVAPSmr;
167 return StOpcode == X86::VMOVUPDmr || StOpcode == X86::VMOVAPDmr;
170 return StOpcode == X86::VMOVDQUmr || StOpcode == X86::VMOVDQAmr;
171 case X86::VMOVUPSZ128rm:
172 case X86::VMOVAPSZ128rm:
173 return StOpcode == X86::VMOVUPSZ128mr || StOpcode == X86::VMOVAPSZ128mr;
174 case X86::VMOVUPDZ128rm:
175 case X86::VMOVAPDZ128rm:
176 return StOpcode == X86::VMOVUPDZ128mr || StOpcode == X86::VMOVAPDZ128mr;
177 case X86::VMOVUPSYrm:
178 case X86::VMOVAPSYrm:
179 return StOpcode == X86::VMOVUPSYmr || StOpcode == X86::VMOVAPSYmr;
180 case X86::VMOVUPDYrm:
181 case X86::VMOVAPDYrm:
182 return StOpcode == X86::VMOVUPDYmr || StOpcode == X86::VMOVAPDYmr;
183 case X86::VMOVDQUYrm:
184 case X86::VMOVDQAYrm:
185 return StOpcode == X86::VMOVDQUYmr || StOpcode == X86::VMOVDQAYmr;
186 case X86::VMOVUPSZ256rm:
187 case X86::VMOVAPSZ256rm:
188 return StOpcode == X86::VMOVUPSZ256mr || StOpcode == X86::VMOVAPSZ256mr;
189 case X86::VMOVUPDZ256rm:
190 case X86::VMOVAPDZ256rm:
191 return StOpcode == X86::VMOVUPDZ256mr || StOpcode == X86::VMOVAPDZ256mr;
192 case X86::VMOVDQU64Z128rm:
193 case X86::VMOVDQA64Z128rm:
194 return StOpcode == X86::VMOVDQU64Z128mr || StOpcode == X86::VMOVDQA64Z128mr;
195 case X86::VMOVDQU32Z128rm:
196 case X86::VMOVDQA32Z128rm:
197 return StOpcode == X86::VMOVDQU32Z128mr || StOpcode == X86::VMOVDQA32Z128mr;
198 case X86::VMOVDQU64Z256rm:
199 case X86::VMOVDQA64Z256rm:
200 return StOpcode == X86::VMOVDQU64Z256mr || StOpcode == X86::VMOVDQA64Z256mr;
201 case X86::VMOVDQU32Z256rm:
202 case X86::VMOVDQA32Z256rm:
203 return StOpcode == X86::VMOVDQU32Z256mr || StOpcode == X86::VMOVDQA32Z256mr;
211 PBlock |= Opcode == X86::MOV64mr || Opcode == X86::MOV64mi32 ||
212 Opcode == X86::MOV32mr || Opcode == X86::MOV32mi ||
213 Opcode == X86::MOV16mr || Opcode == X86::MOV16mi ||
214 Opcode == X86::MOV8mr || Opcode == X86::MOV8mi;
216 PBlock |= Opcode == X86::VMOVUPSmr || Opcode == X86::VMOVAPSmr ||
217 Opcode == X86::VMOVUPDmr || Opcode == X86::VMOVAPDmr ||
218 Opcode == X86::VMOVDQUmr || Opcode == X86::VMOVDQAmr ||
219 Opcode == X86::VMOVUPSZ128mr || Opcode == X86::VMOVAPSZ128mr ||
220 Opcode == X86::VMOVUPDZ128mr || Opcode == X86::VMOVAPDZ128mr ||
221 Opcode == X86::VMOVDQU64Z128mr ||
222 Opcode == X86::VMOVDQA64Z128mr ||
223 Opcode == X86::VMOVDQU32Z128mr || Opcode == X86::VMOVDQA32Z128mr;
234 switch (LoadOpcode) {
235 case X86::VMOVUPSYrm:
236 case X86::VMOVAPSYrm:
237 return X86::VMOVUPSrm;
238 case X86::VMOVUPDYrm:
239 case X86::VMOVAPDYrm:
240 return X86::VMOVUPDrm;
241 case X86::VMOVDQUYrm:
242 case X86::VMOVDQAYrm:
243 return X86::VMOVDQUrm;
244 case X86::VMOVUPSZ256rm:
245 case X86::VMOVAPSZ256rm:
246 return X86::VMOVUPSZ128rm;
247 case X86::VMOVUPDZ256rm:
248 case X86::VMOVAPDZ256rm:
249 return X86::VMOVUPDZ128rm;
250 case X86::VMOVDQU64Z256rm:
251 case X86::VMOVDQA64Z256rm:
252 return X86::VMOVDQU64Z128rm;
253 case X86::VMOVDQU32Z256rm:
254 case X86::VMOVDQA32Z256rm:
255 return X86::VMOVDQU32Z128rm;
263 switch (StoreOpcode) {
264 case X86::VMOVUPSYmr:
265 case X86::VMOVAPSYmr:
266 return X86::VMOVUPSmr;
267 case X86::VMOVUPDYmr:
268 case X86::VMOVAPDYmr:
269 return X86::VMOVUPDmr;
270 case X86::VMOVDQUYmr:
271 case X86::VMOVDQAYmr:
272 return X86::VMOVDQUmr;
273 case X86::VMOVUPSZ256mr:
274 case X86::VMOVAPSZ256mr:
275 return X86::VMOVUPSZ128mr;
276 case X86::VMOVUPDZ256mr:
277 case X86::VMOVAPDZ256mr:
278 return X86::VMOVUPDZ128mr;
279 case X86::VMOVDQU64Z256mr:
280 case X86::VMOVDQA64Z256mr:
281 return X86::VMOVDQU64Z128mr;
282 case X86::VMOVDQU32Z256mr:
283 case X86::VMOVDQA32Z256mr:
284 return X86::VMOVDQU32Z128mr;
294 assert(AddrOffset != -1 &&
"Expected Memory Operand");
320 if (!((Base.
isReg() && Base.
getReg() != X86::NoRegister) || Base.
isFI()))
326 if (!(Index.
isReg() && Index.
getReg() == X86::NoRegister))
328 if (!(Segment.
isReg() && Segment.
getReg() == X86::NoRegister))
341 unsigned BlockCount = 0;
345 PBInst !=
E; ++PBInst) {
347 if (BlockCount >= InspectionLimit)
351 return PotentialBlockers;
358 if (BlockCount < InspectionLimit) {
360 int LimitLeft = InspectionLimit - BlockCount;
368 PBInst != PME; ++PBInst) {
370 if (PredCount >= LimitLeft)
372 if (PBInst->getDesc().isCall())
378 return PotentialBlockers;
383 unsigned NStoreOpcode, int64_t StoreDisp,
384 unsigned Size, int64_t LMMOffset,
392 unsigned Reg1 =
MRI->createVirtualRegister(
399 .addReg(X86::NoRegister)
401 .addReg(X86::NoRegister)
404 if (LoadBase.
isReg())
413 BuildMI(*MBB, StInst, StInst->getDebugLoc(),
TII->get(NStoreOpcode))
416 .addReg(X86::NoRegister)
418 .addReg(X86::NoRegister)
422 if (StoreBase.
isReg())
425 assert(StoreSrcVReg.
isReg() &&
"Expected virtual register");
430 void X86AvoidSFBPass::buildCopies(
int Size,
MachineInstr *LoadInst,
432 int64_t StDispImm, int64_t LMMOffset,
434 int LdDisp = LdDispImm;
435 int StDisp = StDispImm;
441 StDisp, MOV128SZ, LMMOffset, SMMOffset);
448 if (Size - MOV64SZ >= 0) {
450 buildCopy(LoadInst, X86::MOV64rm, LdDisp, StoreInst, X86::MOV64mr, StDisp,
451 MOV64SZ, LMMOffset, SMMOffset);
458 if (Size - MOV32SZ >= 0) {
460 buildCopy(LoadInst, X86::MOV32rm, LdDisp, StoreInst, X86::MOV32mr, StDisp,
461 MOV32SZ, LMMOffset, SMMOffset);
468 if (Size - MOV16SZ >= 0) {
470 buildCopy(LoadInst, X86::MOV16rm, LdDisp, StoreInst, X86::MOV16mr, StDisp,
471 MOV16SZ, LMMOffset, SMMOffset);
478 if (Size - MOV8SZ >= 0) {
480 buildCopy(LoadInst, X86::MOV8rm, LdDisp, StoreInst, X86::MOV8mr, StDisp,
481 MOV8SZ, LMMOffset, SMMOffset);
489 assert(Size == 0 &&
"Wrong size division");
495 if (LoadBase.
isReg()) {
505 if (StoreBase.
isReg()) {
528 void X86AvoidSFBPass::findPotentiallylBlockedCopies(
MachineFunction &MF) {
530 for (
auto &
MI : MBB) {
533 int DefVR =
MI.getOperand(0).getReg();
534 if (!
MRI->hasOneUse(DefVR))
536 for (
auto UI =
MRI->use_nodbg_begin(DefVR), UE =
MRI->use_nodbg_end();
546 "Expected one memory operand for load instruction");
548 "Expected one memory operand for store instruction");
550 BlockedLoadsStoresPairs.push_back(std::make_pair(&
MI, &StoreMI));
556 unsigned X86AvoidSFBPass::getRegSizeInBytes(
MachineInstr *LoadInst) {
559 return TRI->getRegSizeInBits(*TRC) / 8;
562 void X86AvoidSFBPass::breakBlockedCopies(
564 const DisplacementSizeMap &BlockingStoresDispSizeMap) {
567 int64_t LMMOffset = 0;
568 int64_t SMMOffset = 0;
570 int64_t LdDisp1 = LdDispImm;
572 int64_t StDisp1 = StDispImm;
576 int64_t LdStDelta = StDispImm - LdDispImm;
578 for (
auto DispSizePair : BlockingStoresDispSizeMap) {
579 LdDisp2 = DispSizePair.first;
580 StDisp2 = DispSizePair.first + LdStDelta;
581 Size2 = DispSizePair.second;
583 if (LdDisp2 < LdDisp1) {
584 int OverlapDelta = LdDisp1 - LdDisp2;
585 LdDisp2 += OverlapDelta;
586 StDisp2 += OverlapDelta;
587 Size2 -= OverlapDelta;
589 Size1 = LdDisp2 - LdDisp1;
593 buildCopies(Size1, LoadInst, LdDisp1, StoreInst, StDisp1, LMMOffset,
596 buildCopies(Size2, LoadInst, LdDisp2, StoreInst, StDisp2, LMMOffset + Size1,
598 LdDisp1 = LdDisp2 + Size2;
599 StDisp1 = StDisp2 + Size2;
600 LMMOffset += Size1 + Size2;
601 SMMOffset += Size1 + Size2;
603 unsigned Size3 = (LdDispImm + getRegSizeInBytes(LoadInst)) - LdDisp1;
604 buildCopies(Size3, LoadInst, LdDisp1, StoreInst, StDisp1, LMMOffset,
614 if (LoadBase.
isReg())
620 int64_t StoreDispImm,
unsigned StoreSize) {
621 return ((StoreDispImm >= LoadDispImm) &&
622 (StoreDispImm <= LoadDispImm + (LoadSize - StoreSize)));
628 int64_t DispImm,
unsigned Size) {
629 if (BlockingStoresDispSizeMap.count(DispImm)) {
631 if (BlockingStoresDispSizeMap[DispImm] > Size)
632 BlockingStoresDispSizeMap[DispImm] =
Size;
635 BlockingStoresDispSizeMap[DispImm] =
Size;
641 if (BlockingStoresDispSizeMap.size() <= 1)
645 for (
auto DispSizePair : BlockingStoresDispSizeMap) {
646 int64_t CurrDisp = DispSizePair.first;
647 unsigned CurrSize = DispSizePair.second;
648 while (DispSizeStack.
size()) {
649 int64_t PrevDisp = DispSizeStack.
back().first;
650 unsigned PrevSize = DispSizeStack.
back().second;
651 if (CurrDisp + CurrSize > PrevDisp + PrevSize)
657 BlockingStoresDispSizeMap.clear();
658 for (
auto Disp : DispSizeStack)
659 BlockingStoresDispSizeMap.insert(Disp);
663 bool Changed =
false;
670 assert(
MRI->isSSA() &&
"Expected MIR to be in SSA form");
673 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
676 findPotentiallylBlockedCopies(MF);
678 for (
auto LoadStoreInstPair : BlockedLoadsStoresPairs) {
681 DisplacementSizeMap BlockingStoresDispSizeMap;
685 for (
auto PBInst : PotentialBlockers) {
691 assert(PBInst->hasOneMemOperand() &&
"Expected One Memory Operand");
692 unsigned PBstSize = (*PBInst->memoperands_begin())->getSize();
704 if (BlockingStoresDispSizeMap.empty())
716 breakBlockedCopies(LoadInst, StoreInst, BlockingStoresDispSizeMap);
718 ForRemoval.push_back(LoadInst);
719 ForRemoval.push_back(StoreInst);
721 for (
auto RemovedInst : ForRemoval) {
722 RemovedInst->eraseFromParent();
725 BlockedLoadsStoresPairs.clear();
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static bool isYMMLoadOpcode(unsigned Opcode)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static void updateKillStatus(MachineInstr *LoadInst, MachineInstr *StoreInst)
This class represents lattice values for constants.
void push_back(const T &Elt)
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Describe properties that are true of each instruction in the target description file.
unsigned getReg() const
getReg - Returns the register number.
The two locations do not alias at all.
AddrNumOperands - Total number of operands in a memory reference.
uint64_t getSize() const
Return the size in bytes of the memory reference.
unsigned const TargetRegisterInfo * TRI
An instruction for reading from memory.
static void removeRedundantBlockingStores(DisplacementSizeMap &BlockingStoresDispSizeMap)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
static cl::opt< bool > DisableX86AvoidStoreForwardBlocks("x86-disable-avoid-SFB", cl::Hidden, cl::desc("X86: Disable Store Forwarding Blocks fixup."), cl::init(false))
static void updateBlockingStoresDispSizeMap(DisplacementSizeMap &BlockingStoresDispSizeMap, int64_t DispImm, unsigned Size)
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
A description of a memory reference used in the backend.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
static bool hasSameBaseOpValue(MachineInstr *LoadInst, MachineInstr *StoreInst)
MachineBasicBlock iterator that automatically skips over MIs that are inside bundles (i...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
static bool isBlockingStore(int64_t LoadDispImm, unsigned LoadSize, int64_t StoreDispImm, unsigned StoreSize)
void initializeX86AvoidSFBPassPass(PassRegistry &)
static bool isPotentialBlockingStoreInst(int Opcode, int LoadOpcode)
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
An instruction for storing to memory.
COFF::MachineTypes Machine
reverse_iterator rbegin()
AliasResult
The possible results of an alias query.
const Value * getValue() const
Return the base address of the memory access.
static cl::opt< unsigned > X86AvoidSFBInspectionLimit("x86-sfb-inspection-limit", cl::desc("X86: Number of instructions backward to " "inspect for store forwarding blocks."), cl::init(20), cl::Hidden)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
initializer< Ty > init(const Ty &Val)
static int getAddrOffset(MachineInstr *MI)
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static unsigned getYMMtoXMMStoreOpcode(unsigned StoreOpcode)
Represent the analysis usage information of a pass.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
static bool isPotentialBlockedMemCpyLd(unsigned Opcode)
FunctionPass class - This class is used to implement most global optimizations.
unsigned getOperandBias(const MCInstrDesc &Desc)
getOperandBias - compute whether all of the def operands are repeated in the uses and therefore shoul...
std::vector< MachineBasicBlock * >::iterator pred_iterator
pred_iterator pred_begin()
static const int MOV128SZ
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setIsKill(bool Val=true)
Representation for a specific memory location.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
static SmallVector< MachineInstr *, 2 > findPotentialBlockers(MachineInstr *LoadInst)
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
static MachineOperand & getBaseOperand(MachineInstr *MI)
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static MachineOperand & getDispOperand(MachineInstr *MI)
const MachineBasicBlock * getParent() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static bool isRelevantAddressingMode(MachineInstr *MI)
INITIALIZE_PASS_BEGIN(X86AvoidSFBPass, DEBUG_TYPE, "Machine code sinking", false, false) INITIALIZE_PASS_END(X86AvoidSFBPass
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isCall() const
Return true if the instruction is a call.
static bool isXMMLoadOpcode(unsigned Opcode)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
FunctionPass * createX86AvoidStoreForwardingBlocks()
Return a pass that avoids creating store forward block issues in the hardware.
static unsigned getYMMtoXMMLoadOpcode(unsigned LoadOpcode)
StringRef - Represent a constant reference to a string, i.e.
static bool isPotentialBlockedMemCpyPair(int LdOpcode, int StOpcode)
AddrSegmentReg - The operand # of the segment in the memory operand.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
const MachineOperand & getOperand(unsigned i) const
int getMemoryOperandNo(uint64_t TSFlags)
getMemoryOperandNo - The function returns the MCInst operand # for the first field of the memory oper...