48 #define DEBUG_TYPE "si-memory-legalizer" 49 #define PASS_NAME "SI Memory Legalizer" 94 ATOMIC = GLOBAL | LDS | SCRATCH | GDS,
97 ALL = GLOBAL | LDS | SCRATCH | GDS | OTHER,
104 template <u
int16_t BitName>
118 class SIMemOpInfo final {
121 friend class SIMemOpAccess;
128 bool IsCrossAddressSpaceOrdering =
false;
129 bool IsNonTemporal =
false;
135 bool IsCrossAddressSpaceOrdering =
true,
138 bool IsNonTemporal =
false)
139 : Ordering(Ordering), FailureOrdering(FailureOrdering),
140 Scope(Scope), OrderingAddrSpace(OrderingAddrSpace),
141 InstrAddrSpace(InstrAddrSpace),
142 IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
143 IsNonTemporal(IsNonTemporal) {
147 if ((OrderingAddrSpace == InstrAddrSpace) &&
149 IsCrossAddressSpaceOrdering =
false;
168 return FailureOrdering;
174 return InstrAddrSpace;
180 return OrderingAddrSpace;
185 bool getIsCrossAddressSpaceOrdering()
const {
186 return IsCrossAddressSpaceOrdering;
191 bool isNonTemporal()
const {
192 return IsNonTemporal;
203 class SIMemOpAccess final {
209 const char *Msg)
const;
250 class SICacheControl {
263 static std::unique_ptr<SICacheControl> create(
const GCNSubtarget &ST);
298 bool IsCrossAddrSpaceOrdering,
302 virtual ~SICacheControl() =
default;
306 class SIGfx6CacheControl :
public SICacheControl {
312 return enableNamedBit<AMDGPU::OpName::glc>(
MI);
318 return enableNamedBit<AMDGPU::OpName::slc>(
MI);
323 SIGfx6CacheControl(
const GCNSubtarget &
ST) : SICacheControl(ST) {};
340 bool IsCrossAddrSpaceOrdering,
344 class SIGfx7CacheControl :
public SIGfx6CacheControl {
347 SIGfx7CacheControl(
const GCNSubtarget &
ST) : SIGfx6CacheControl(ST) {};
360 std::unique_ptr<SICacheControl> CC =
nullptr;
363 std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
373 bool removeAtomicPseudoMIs();
377 bool expandLoad(
const SIMemOpInfo &MOI,
381 bool expandStore(
const SIMemOpInfo &MOI,
385 bool expandAtomicFence(
const SIMemOpInfo &MOI,
389 bool expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
412 const char *Msg)
const {
413 const Function &Func = MI->getParent()->getParent()->getFunction();
428 return std::make_tuple(SIAtomicScope::SYSTEM,
429 SIAtomicAddrSpace::ATOMIC & InstrScope,
431 if (SSID == MMI->getAgentSSID())
432 return std::make_tuple(SIAtomicScope::AGENT,
433 SIAtomicAddrSpace::ATOMIC & InstrScope,
435 if (SSID == MMI->getWorkgroupSSID())
436 return std::make_tuple(SIAtomicScope::WORKGROUP,
437 SIAtomicAddrSpace::ATOMIC & InstrScope,
439 if (SSID == MMI->getWavefrontSSID())
440 return std::make_tuple(SIAtomicScope::WAVEFRONT,
441 SIAtomicAddrSpace::ATOMIC & InstrScope,
444 return std::make_tuple(SIAtomicScope::SINGLETHREAD,
445 SIAtomicAddrSpace::ATOMIC & InstrScope,
459 return SIAtomicAddrSpace::LDS;
463 return SIAtomicAddrSpace::GDS;
465 return SIAtomicAddrSpace::OTHER;
474 assert(MI->getNumMemOperands() > 0);
480 bool IsNonTemporal =
true;
484 for (
const auto &MMO : MI->memoperands()) {
485 IsNonTemporal &= MMO->isNonTemporal();
487 toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
490 const auto &IsSyncScopeInclusion =
491 MMI->isSyncScopeInclusion(SSID, MMO->getSyncScopeID());
492 if (!IsSyncScopeInclusion) {
493 reportUnsupported(MI,
494 "Unsupported non-inclusive atomic synchronization scope");
498 SSID = IsSyncScopeInclusion.getValue() ? SSID : MMO->getSyncScopeID();
501 Ordering : MMO->getOrdering();
506 FailureOrdering : MMO->getFailureOrdering();
512 bool IsCrossAddressSpaceOrdering =
false;
514 auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);
516 reportUnsupported(MI,
"Unsupported atomic synchronization scope");
519 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
520 ScopeOrNone.getValue();
522 ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
523 reportUnsupported(MI,
"Unsupported atomic address space");
527 return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
528 IsCrossAddressSpaceOrdering, FailureOrdering, IsNonTemporal);
535 if (!(MI->mayLoad() && !MI->mayStore()))
539 if (MI->getNumMemOperands() == 0)
540 return SIMemOpInfo();
542 return constructFromMIWithMMO(MI);
549 if (!(!MI->mayLoad() && MI->mayStore()))
553 if (MI->getNumMemOperands() == 0)
554 return SIMemOpInfo();
556 return constructFromMIWithMMO(MI);
570 auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);
572 reportUnsupported(MI,
"Unsupported atomic synchronization scope");
578 bool IsCrossAddressSpaceOrdering =
false;
579 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
580 ScopeOrNone.getValue();
583 ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
584 reportUnsupported(MI,
"Unsupported atomic address space");
588 return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, SIAtomicAddrSpace::ATOMIC,
589 IsCrossAddressSpaceOrdering);
596 if (!(MI->mayLoad() && MI->mayStore()))
600 if (MI->getNumMemOperands() == 0)
601 return SIMemOpInfo();
603 return constructFromMIWithMMO(MI);
612 std::unique_ptr<SICacheControl> SICacheControl::create(
const GCNSubtarget &ST) {
615 return make_unique<SIGfx6CacheControl>(
ST);
616 return make_unique<SIGfx7CacheControl>(
ST);
619 bool SIGfx6CacheControl::enableLoadCacheBypass(
623 assert(MI->mayLoad() && !MI->mayStore());
624 bool Changed =
false;
631 case SIAtomicScope::SYSTEM:
632 case SIAtomicScope::AGENT:
633 Changed |= enableGLCBit(MI);
635 case SIAtomicScope::WORKGROUP:
636 case SIAtomicScope::WAVEFRONT:
637 case SIAtomicScope::SINGLETHREAD:
655 bool SIGfx6CacheControl::enableNonTemporal(
657 assert(MI->mayLoad() ^ MI->mayStore());
658 bool Changed =
false;
661 Changed |= enableGLCBit(MI);
662 Changed |= enableSLCBit(MI);
671 bool Changed =
false;
676 if (Pos == Position::AFTER)
681 case SIAtomicScope::SYSTEM:
682 case SIAtomicScope::AGENT:
683 BuildMI(MBB, MI, DL,
TII->get(AMDGPU::BUFFER_WBINVL1));
686 case SIAtomicScope::WORKGROUP:
687 case SIAtomicScope::WAVEFRONT:
688 case SIAtomicScope::SINGLETHREAD:
703 if (Pos == Position::AFTER)
713 bool IsCrossAddrSpaceOrdering,
715 bool Changed =
false;
720 if (Pos == Position::AFTER)
724 bool LGKMCnt =
false;
729 case SIAtomicScope::SYSTEM:
730 case SIAtomicScope::AGENT:
733 case SIAtomicScope::WORKGROUP:
734 case SIAtomicScope::WAVEFRONT:
735 case SIAtomicScope::SINGLETHREAD:
746 case SIAtomicScope::SYSTEM:
747 case SIAtomicScope::AGENT:
748 case SIAtomicScope::WORKGROUP:
755 LGKMCnt = IsCrossAddrSpaceOrdering;
757 case SIAtomicScope::WAVEFRONT:
758 case SIAtomicScope::SINGLETHREAD:
769 case SIAtomicScope::SYSTEM:
770 case SIAtomicScope::AGENT:
777 EXPCnt = IsCrossAddrSpaceOrdering;
779 case SIAtomicScope::WORKGROUP:
780 case SIAtomicScope::WAVEFRONT:
781 case SIAtomicScope::SINGLETHREAD:
790 if (VMCnt || LGKMCnt || EXPCnt) {
791 unsigned WaitCntImmediate =
796 BuildMI(MBB, MI, DL,
TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
800 if (Pos == Position::AFTER)
810 bool Changed =
false;
818 ? AMDGPU::BUFFER_WBINVL1
819 : AMDGPU::BUFFER_WBINVL1_VOL;
821 if (Pos == Position::AFTER)
826 case SIAtomicScope::SYSTEM:
827 case SIAtomicScope::AGENT:
831 case SIAtomicScope::WORKGROUP:
832 case SIAtomicScope::WAVEFRONT:
833 case SIAtomicScope::SINGLETHREAD:
848 if (Pos == Position::AFTER)
854 bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
855 if (AtomicPseudoMIs.empty())
858 for (
auto &MI : AtomicPseudoMIs)
859 MI->eraseFromParent();
861 AtomicPseudoMIs.clear();
865 bool SIMemoryLegalizer::expandLoad(
const SIMemOpInfo &MOI,
867 assert(MI->mayLoad() && !MI->mayStore());
869 bool Changed =
false;
871 if (MOI.isAtomic()) {
875 Changed |= CC->enableLoadCacheBypass(MI, MOI.getScope(),
876 MOI.getOrderingAddrSpace());
880 Changed |= CC->insertWait(MI, MOI.getScope(),
881 MOI.getOrderingAddrSpace(),
883 MOI.getIsCrossAddressSpaceOrdering(),
888 Changed |= CC->insertWait(MI, MOI.getScope(),
889 MOI.getInstrAddrSpace(),
891 MOI.getIsCrossAddressSpaceOrdering(),
893 Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
894 MOI.getOrderingAddrSpace(),
902 if (MOI.isNonTemporal()) {
903 Changed |= CC->enableNonTemporal(MI);
910 bool SIMemoryLegalizer::expandStore(
const SIMemOpInfo &MOI,
912 assert(!MI->mayLoad() && MI->mayStore());
914 bool Changed =
false;
916 if (MOI.isAtomic()) {
919 Changed |= CC->insertWait(MI, MOI.getScope(),
920 MOI.getOrderingAddrSpace(),
922 MOI.getIsCrossAddressSpaceOrdering(),
929 if (MOI.isNonTemporal()) {
930 Changed |= CC->enableNonTemporal(MI);
937 bool SIMemoryLegalizer::expandAtomicFence(
const SIMemOpInfo &MOI,
941 AtomicPseudoMIs.push_back(MI);
942 bool Changed =
false;
944 if (MOI.isAtomic()) {
956 Changed |= CC->insertWait(MI, MOI.getScope(),
957 MOI.getOrderingAddrSpace(),
959 MOI.getIsCrossAddressSpaceOrdering(),
965 Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
966 MOI.getOrderingAddrSpace(),
975 bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
977 assert(MI->mayLoad() && MI->mayStore());
979 bool Changed =
false;
981 if (MOI.isAtomic()) {
986 Changed |= CC->insertWait(MI, MOI.getScope(),
987 MOI.getOrderingAddrSpace(),
989 MOI.getIsCrossAddressSpaceOrdering(),
997 Changed |= CC->insertWait(MI, MOI.getScope(),
998 MOI.getOrderingAddrSpace(),
1001 MOI.getIsCrossAddressSpaceOrdering(),
1003 Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
1004 MOI.getOrderingAddrSpace(),
1015 bool Changed =
false;
1017 SIMemOpAccess MOA(MF);
1020 for (
auto &MBB : MF) {
1021 for (
auto MI = MBB.begin(); MI != MBB.end(); ++
MI) {
1025 if (
const auto &MOI = MOA.getLoadInfo(MI))
1026 Changed |= expandLoad(MOI.getValue(),
MI);
1027 else if (
const auto &MOI = MOA.getStoreInfo(MI))
1028 Changed |= expandStore(MOI.getValue(),
MI);
1029 else if (
const auto &MOI = MOA.getAtomicFenceInfo(MI))
1030 Changed |= expandAtomicFence(MOI.getValue(),
MI);
1031 else if (
const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI))
1032 Changed |= expandAtomicCmpxchgOrRmw(MOI.getValue(),
MI);
1036 Changed |= removeAtomicPseudoMIs();
1046 return new SIMemoryLegalizer();
#define LLVM_MARK_AS_BITMASK_ENUM(LargestValue)
LLVM_MARK_AS_BITMASK_ENUM lets you opt in an individual enum type so you can perform bitwise operatio...
Diagnostic information for unsupported feature in backend.
AMDGPU specific subclass of TargetSubtarget.
Atomic ordering constants.
This class represents lattice values for constants.
unsigned getExpcntBitMask(const IsaVersion &Version)
Instruction set architecture version.
static Optional< LoadInfo > getLoadInfo(const MachineInstr &MI)
Address space for private memory.
const SIInstrInfo * getInstrInfo() const override
MachineModuleInfo & getMMI() const
LLVM_READONLY int getAtomicNoRetOp(uint16_t Opcode)
FunctionPass * createSIMemoryLegalizerPass()
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
bool isStrongerThan(AtomicOrdering ao, AtomicOrdering other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice, which is based on C++'s definition.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Position
Position to insert a new instruction relative to an existing instruction.
AtomicOrdering
Atomic ordering for LLVM's memory model.
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Address space for flat memory.
Represent the analysis usage information of a pass.
SIAtomicScope
The atomic synchronization scopes supported by the AMDGPU target.
Address space for local memory.
AMDGPU Machine Module Info.
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Generation getGeneration() const
Ty & getObjFileInfo()
Keep track of various per-function pieces of information for backends that would like to do so...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Address space for global memory (RAT0, VTX0).
static bool isAtomic(Instruction *I)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
IsaVersion getIsaVersion(StringRef GPU)
SIAtomicAddrSpace
The distinct address spaces supported by the AMDGPU target for atomic memory operation.
MachineOperand class - Representation of each machine instruction operand.
char & SIMemoryLegalizerID
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
SIMemOp
Memory operation flags. Can be ORed together.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Interface definition for SIInstrInfo.
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Synchronized with respect to signal handlers executing in the same thread.
Address space for region memory. (GDS)
Synchronized with respect to all concurrently executing threads.
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
StringRef - Represent a constant reference to a string, i.e.
unsigned getLgkmcntBitMask(const IsaVersion &Version)
unsigned getVmcntBitMask(const IsaVersion &Version)