|
LLVM
8.0.1
|
Insert wait instructions for memory reads and writes. More...
#include "AMDGPU.h"#include "AMDGPUSubtarget.h"#include "SIDefines.h"#include "SIInstrInfo.h"#include "SIMachineFunctionInfo.h"#include "SIRegisterInfo.h"#include "Utils/AMDGPUBaseInfo.h"#include "llvm/ADT/DenseMap.h"#include "llvm/ADT/DenseSet.h"#include "llvm/ADT/PostOrderIterator.h"#include "llvm/ADT/STLExtras.h"#include "llvm/ADT/SmallVector.h"#include "llvm/CodeGen/MachineBasicBlock.h"#include "llvm/CodeGen/MachineFunction.h"#include "llvm/CodeGen/MachineFunctionPass.h"#include "llvm/CodeGen/MachineInstr.h"#include "llvm/CodeGen/MachineInstrBuilder.h"#include "llvm/CodeGen/MachineMemOperand.h"#include "llvm/CodeGen/MachineOperand.h"#include "llvm/CodeGen/MachineRegisterInfo.h"#include "llvm/IR/DebugLoc.h"#include "llvm/Pass.h"#include "llvm/Support/Debug.h"#include "llvm/Support/DebugCounter.h"#include "llvm/Support/ErrorHandling.h"#include "llvm/Support/raw_ostream.h"#include <algorithm>#include <cassert>#include <cstdint>#include <cstring>#include <memory>#include <utility>#include <vector>
Go to the source code of this file.
Macros | |
| #define | DEBUG_TYPE "si-insert-waitcnts" |
| #define | CNT_MASK(t) (1u << (t)) |
Enumerations | |
| enum | InstCounterType |
| enum | WaitEventType |
| enum | RegisterMapping |
Functions | |
| DEBUG_COUNTER (ForceExpCounter, DEBUG_TYPE"-forceexp", "Force emit s_waitcnt expcnt(0) instrs") | |
| DEBUG_COUNTER (ForceLgkmCounter, DEBUG_TYPE"-forcelgkm", "Force emit s_waitcnt lgkmcnt(0) instrs") | |
| DEBUG_COUNTER (ForceVMCounter, DEBUG_TYPE"-forcevm", "Force emit s_waitcnt vmcnt(0) instrs") | |
| INITIALIZE_PASS_BEGIN (SIInsertWaitcnts, DEBUG_TYPE, "SI Insert Waitcnts", false, false) INITIALIZE_PASS_END(SIInsertWaitcnts | |
| static bool | readsVCCZ (const MachineInstr &MI) |
Variables | |
| static cl::opt< unsigned > | ForceEmitZeroFlag ("amdgpu-waitcnt-forcezero", cl::desc("Force all waitcnt instrs to be emitted as s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)"), cl::init(0), cl::Hidden) |
| DEBUG_TYPE | |
| SI Insert | Waitcnts |
| SI Insert | false |
Insert wait instructions for memory reads and writes.
Memory reads and writes are issued asynchronously, so we need to insert S_WAITCNT instructions when we want to access any of their results or overwrite any register that's used asynchronously.
TODO: This pass currently keeps one timeline per hardware counter. A more finely-grained approach that keeps one timeline per event type could sometimes get away with generating weaker s_waitcnt instructions. For example, when both SMEM and LDS are in flight and we need to wait for the i-th-last LDS instruction, then an lgkmcnt(i) is actually sufficient, but the pass will currently generate a conservative lgkmcnt(0) because multiple event types are in flight.
Definition in file SIInsertWaitcnts.cpp.
| #define CNT_MASK | ( | t | ) | (1u << (t)) |
Definition at line 102 of file SIInsertWaitcnts.cpp.
Referenced by readsVCCZ().
| #define DEBUG_TYPE "si-insert-waitcnts" |
Definition at line 63 of file SIInsertWaitcnts.cpp.
| enum InstCounterType |
Definition at line 104 of file SIInsertWaitcnts.cpp.
| enum RegisterMapping |
Definition at line 156 of file SIInsertWaitcnts.cpp.
| enum WaitEventType |
Definition at line 128 of file SIInsertWaitcnts.cpp.
| DEBUG_COUNTER | ( | ForceExpCounter | , |
| DEBUG_TYPE"-forceexp" | , | ||
| "Force emit s_waitcnt expcnt(0) instrs" | |||
| ) |
| DEBUG_COUNTER | ( | ForceLgkmCounter | , |
| DEBUG_TYPE"-forcelgkm" | , | ||
| "Force emit s_waitcnt lgkmcnt(0) instrs" | |||
| ) |
| DEBUG_COUNTER | ( | ForceVMCounter | , |
| DEBUG_TYPE"-forcevm" | , | ||
| "Force emit s_waitcnt vmcnt(0) instrs" | |||
| ) |
| INITIALIZE_PASS_BEGIN | ( | SIInsertWaitcnts | , |
| DEBUG_TYPE | , | ||
| "SI Insert Waitcnts" | , | ||
| false | , | ||
| false | |||
| ) |
|
static |
Definition at line 790 of file SIInsertWaitcnts.cpp.
References llvm::AMDGPU::Waitcnt::allZero(), assert(), llvm::MachineBasicBlock::begin(), llvm::MachineFunction::begin(), llvm::BuildMI(), CNT_MASK, llvm::dbgs(), llvm::AMDGPU::decodeWaitcnt(), llvm::tgtok::Def, E, llvm::AMDGPU::encodeWaitcnt(), llvm::MachineBasicBlock::end(), llvm::MachineFunction::end(), llvm::MachineInstr::eraseFromParent(), llvm::SIInstrFlags::EXP, llvm::SIInstrFlags::EXP_CNT, AMDGPUAS::FLAT_ADDRESS, ForceEmitZeroFlag, llvm::MachineFunction::front(), llvm::AMDGPUSubtarget::get(), llvm::AMDGPU::getAtomicNoRetOp(), llvm::MachineInstr::getDebugLoc(), llvm::AMDGPU::getExpcntBitMask(), llvm::MachineBasicBlock::getFirstNonPHI(), llvm::MachineOperand::getImm(), llvm::MachineFunction::getInfo(), llvm::AMDGPU::getIsaVersion(), llvm::ilist_node_impl< OptionsT >::getIterator(), llvm::AMDGPU::getLgkmcntBitMask(), llvm::MachineBasicBlock::getNumber(), llvm::MachineInstr::getNumOperands(), llvm::MachineInstr::getOpcode(), llvm::MachineInstr::getOperand(), llvm::MachineInstr::getParent(), llvm::MachineOperand::getReg(), llvm::MachineFunction::getRegInfo(), llvm::GCNSubtarget::getRegisterInfo(), llvm::MachineFunction::getSubtarget(), llvm::AMDGPU::getVmcntBitMask(), I, llvm::AMDGPU::SendMsg::ID_GS_DONE, llvm::AMDGPU::SendMsg::ID_MASK_, llvm::MachineInstr::isDebugInstr(), llvm::MachineOperand::isUndef(), llvm::SIInstrInfo::isVMEM(), llvm::SIInstrFlags::LGKM_CNT, LLVM_DEBUG, AMDGPUAS::LOCAL_ADDRESS, llvm::max(), llvm::MachineInstr::mayLoad(), llvm::MachineInstr::mayStore(), llvm::MachineInstr::memoperands(), llvm::MachineInstr::memoperands_empty(), MI, Modified, llvm::MachineInstr::modifiesRegister(), MRI, Other, llvm::MachineInstr::print(), llvm::SmallVectorTemplateBase< T >::push_back(), llvm::report_fatal_error(), llvm::AMDGPUSubtarget::SEA_ISLANDS, llvm::MachineOperand::setImm(), llvm::ARM_MB::ST, llvm::MachineBasicBlock::successors(), T, TII, TRI, llvm::SIInstrFlags::VM_CNT, and llvm::sys::Wait().
| DEBUG_TYPE |
Definition at line 779 of file SIInsertWaitcnts.cpp.
| SI Insert false |
Definition at line 779 of file SIInsertWaitcnts.cpp.
|
static |
Referenced by readsVCCZ().
| SI Insert Waitcnts |
Definition at line 779 of file SIInsertWaitcnts.cpp.
1.8.13