48 #define DEBUG_TYPE "amdgpu-lower-enqueued-block" 55 class AMDGPUOpenCLEnqueuedBlockLowering :
public ModulePass {
59 explicit AMDGPUOpenCLEnqueuedBlockLowering() :
ModulePass(ID) {}
62 bool runOnModule(
Module &M)
override;
73 "Lower OpenCL enqueued blocks",
false,
false)
76 return new AMDGPUOpenCLEnqueuedBlockLowering();
82 for (
auto U : F->
users()) {
83 if (
auto *CI = dyn_cast<CallInst>(&*U)) {
84 auto *Caller = CI->getParent()->getParent();
85 if (Callers.
insert(Caller).second)
94 if (
auto *
I = dyn_cast<Instruction>(U)) {
95 auto *
F =
I->getParent()->getParent();
100 if (!isa<Constant>(U))
102 for (
auto UU : U->
users())
106 bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(
Module &M) {
109 bool Changed =
false;
111 if (
F.hasFnAttribute(
"enqueued-block")) {
118 LLVM_DEBUG(
dbgs() <<
"found enqueued kernel: " <<
F.getName() <<
'\n');
130 for (
auto U :
F.users()) {
132 if (!isa<ConstantExpr>(UU))
135 auto *BitCast = cast<ConstantExpr>(UU);
138 F.addFnAttr(
"runtime-handle", RuntimeHandle);
145 for (
auto F : Callers) {
148 F->addFnAttr(
"calls-enqueue-kernel");
149 LLVM_DEBUG(
dbgs() <<
"mark enqueue_kernel caller:" <<
F->getName() <<
'\n');
static void collectCallers(Function *F, DenseSet< Function *> &Callers)
Collect direct or indrect callers of F and save them to Callers.
This class represents lattice values for constants.
A Module instance is used to store all the information related to an LLVM module. ...
Implements a dense probed hash-table based set.
Externally visible function.
char & AMDGPUOpenCLEnqueuedBlockLoweringID
static IntegerType * getInt64Ty(LLVMContext &C)
constexpr char RuntimeHandle[]
Key for Kernel::Attr::Metadata::mRuntimeHandle.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
amdgpu Simplify well known AMD library false Value Value const Twine & Name
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
LLVMContext & getContext() const
Get the global data context.
ModulePass * createAMDGPUOpenCLEnqueuedBlockLoweringPass()
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< iterator > functions()
This file contains the declarations for the subclasses of Constant, which represent the different fla...
std::pair< iterator, bool > insert(const ValueT &V)
INITIALIZE_PASS(AMDGPUOpenCLEnqueuedBlockLowering, DEBUG_TYPE, "Lower OpenCL enqueued blocks", false, false) ModulePass *llvm
static void collectFunctionUsers(User *U, DenseSet< Function *> &Funcs)
If U is instruction or constant, collect functions which directly or indirectly use it...
Address space for global memory (RAT0, VTX0).
static Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
Module.h This file contains the declarations for the Module class.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
iterator_range< user_iterator > users()
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
void getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV, bool CannotUsePrivateLabel) const
Print the appropriate prefix and the specified global variable's name.
Calling convention for AMDGPU code object kernels.