LLVM8Doxygen/AMDGPULibCalls_8cpp_source.html

 //===- AMDGPULibCalls.cpp -------------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 /// \file
 /// This file does AMD library function optimizations.
 //
 //===----------------------------------------------------------------------===//

 #define DEBUG_TYPE "amdgpu-simplifylib"

 #include "AMDGPU.h"
 #include "AMDGPULibFunc.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ValueSymbolTable.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetOptions.h"
 #include <vector>
 #include <cmath>

 using namespace llvm;

 static cl::opt<bool> EnablePreLink("amdgpu-prelink",
   cl::desc("Enable pre-link mode optimizations"),
   cl::init(false),
   cl::Hidden);

 static cl::list<std::string> UseNative("amdgpu-use-native",
   cl::desc("Comma separated list of functions to replace with native, or all"),
   cl::CommaSeparated, cl::ValueOptional,
   cl::Hidden);

 #define MATH_PI     3.14159265358979323846264338327950288419716939937511
 #define MATH_E      2.71828182845904523536028747135266249775724709369996
 #define MATH_SQRT2  1.41421356237309504880168872420969807856967187537695

 #define MATH_LOG2E     1.4426950408889634073599246810018921374266459541529859
 #define MATH_LOG10E    0.4342944819032518276511289189166050822943970058036665
 // Value of log2(10)
 #define MATH_LOG2_10   3.3219280948873623478703194294893901758648313930245806
 // Value of 1 / log2(10)
 #define MATH_RLOG2_10  0.3010299956639811952137388947244930267681898814621085
 // Value of 1 / M_LOG2E_F = 1 / log2(e)
 #define MATH_RLOG2_E   0.6931471805599453094172321214581765680755001343602552

 namespace llvm {

 class AMDGPULibCalls {
 private:

   typedef llvm::AMDGPULibFunc FuncInfo;

   // -fuse-native.
   bool AllNative = false;

   bool useNativeFunc(const StringRef F) const;

   // Return a pointer (pointer expr) to the function if function defintion with
   // "FuncName" exists. It may create a new function prototype in pre-link mode.
   Constant *getFunction(Module *M, const FuncInfo& fInfo);

   // Replace a normal function with its native version.
   bool replaceWithNative(CallInst *CI, const FuncInfo &FInfo);

   bool parseFunctionName(const StringRef& FMangledName,
                          FuncInfo *FInfo=nullptr /*out*/);

   bool TDOFold(CallInst *CI, const FuncInfo &FInfo);

   /* Specialized optimizations */

   // recip (half or native)
   bool fold_recip(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);

   // divide (half or native)
   bool fold_divide(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);

   // pow/powr/pown
   bool fold_pow(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);

   // rootn
   bool fold_rootn(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);

   // fma/mad
   bool fold_fma_mad(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);

   // -fuse-native for sincos
   bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo);

   // evaluate calls if calls' arguments are constants.
   bool evaluateScalarMathFunc(FuncInfo &FInfo, double& Res0,
     double& Res1, Constant *copr0, Constant *copr1, Constant *copr2);
   bool evaluateCall(CallInst *aCI, FuncInfo &FInfo);

   // exp
   bool fold_exp(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);

   // exp2
   bool fold_exp2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);

   // exp10
   bool fold_exp10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);

   // log
   bool fold_log(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);

   // log2
   bool fold_log2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);

   // log10
   bool fold_log10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);

   // sqrt
   bool fold_sqrt(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);

   // sin/cos
   bool fold_sincos(CallInst * CI, IRBuilder<> &B, AliasAnalysis * AA);

   // __read_pipe/__write_pipe
   bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, FuncInfo &FInfo);

   // Get insertion point at entry.
   BasicBlock::iterator getEntryIns(CallInst * UI);
   // Insert an Alloc instruction.
   AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix);
   // Get a scalar native builtin signle argument FP function
   Constant* getNativeFunction(Module* M, const FuncInfo &FInfo);

 protected:
   CallInst *CI;

   bool isUnsafeMath(const CallInst *CI) const;

   void replaceCall(Value *With) {
     CI->replaceAllUsesWith(With);
     CI->eraseFromParent();
   }

 public:
   bool fold(CallInst *CI, AliasAnalysis *AA = nullptr);

   void initNativeFuncs();

   // Replace a normal math function call with that native version
   bool useNative(CallInst *CI);
 };

 } // end llvm namespace

 namespace {

   class AMDGPUSimplifyLibCalls : public FunctionPass {

   AMDGPULibCalls Simplifier;

   const TargetOptions Options;

   public:
     static char ID; // Pass identification

     AMDGPUSimplifyLibCalls(const TargetOptions &Opt = TargetOptions())
       : FunctionPass(ID), Options(Opt) {
       initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
     }

     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<AAResultsWrapperPass>();
     }

     bool runOnFunction(Function &M) override;
   };

   class AMDGPUUseNativeCalls : public FunctionPass {

   AMDGPULibCalls Simplifier;

   public:
     static char ID; // Pass identification

     AMDGPUUseNativeCalls() : FunctionPass(ID) {
       initializeAMDGPUUseNativeCallsPass(*PassRegistry::getPassRegistry());
       Simplifier.initNativeFuncs();
     }

     bool runOnFunction(Function &F) override;
   };

 } // end anonymous namespace.

 char AMDGPUSimplifyLibCalls::ID = 0;
 char AMDGPUUseNativeCalls::ID = 0;

 INITIALIZE_PASS_BEGIN(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",
                       "Simplify well-known AMD library calls", false, false)
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",
                     "Simplify well-known AMD library calls", false, false)

 INITIALIZE_PASS(AMDGPUUseNativeCalls, "amdgpu-usenative",
                 "Replace builtin math calls with that native versions.",
                 false, false)

 template <typename IRB>
 static CallInst *CreateCallEx(IRB &B, Value *Callee, Value *Arg,
                               const Twine &Name = "") {
   CallInst *R = B.CreateCall(Callee, Arg, Name);
   if (Function* F = dyn_cast<Function>(Callee))
     R->setCallingConv(F->getCallingConv());
   return R;
 }

 template <typename IRB>
 static CallInst *CreateCallEx2(IRB &B, Value *Callee, Value *Arg1, Value *Arg2,
                                const Twine &Name = "") {
   CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
   if (Function* F = dyn_cast<Function>(Callee))
     R->setCallingConv(F->getCallingConv());
   return R;
 }

 //  Data structures for table-driven optimizations.
 //  FuncTbl works for both f32 and f64 functions with 1 input argument

 struct TableEntry {
   double   result;
   double   input;
 };

 /* a list of {result, input} */
 static const TableEntry tbl_acos[] = {
   {MATH_PI/2.0, 0.0},
   {MATH_PI/2.0, -0.0},
   {0.0, 1.0},
   {MATH_PI, -1.0}
 };
 static const TableEntry tbl_acosh[] = {
   {0.0, 1.0}
 };
 static const TableEntry tbl_acospi[] = {
   {0.5, 0.0},
   {0.5, -0.0},
   {0.0, 1.0},
   {1.0, -1.0}
 };
 static const TableEntry tbl_asin[] = {
   {0.0, 0.0},
   {-0.0, -0.0},
   {MATH_PI/2.0, 1.0},
   {-MATH_PI/2.0, -1.0}
 };
 static const TableEntry tbl_asinh[] = {
   {0.0, 0.0},
   {-0.0, -0.0}
 };
 static const TableEntry tbl_asinpi[] = {
   {0.0, 0.0},
   {-0.0, -0.0},
   {0.5, 1.0},
   {-0.5, -1.0}
 };
 static const TableEntry tbl_atan[] = {
   {0.0, 0.0},
   {-0.0, -0.0},
   {MATH_PI/4.0, 1.0},
   {-MATH_PI/4.0, -1.0}
 };
 static const TableEntry tbl_atanh[] = {
   {0.0, 0.0},
   {-0.0, -0.0}
 };
 static const TableEntry tbl_atanpi[] = {
   {0.0, 0.0},
   {-0.0, -0.0},
   {0.25, 1.0},
   {-0.25, -1.0}
 };
 static const TableEntry tbl_cbrt[] = {
   {0.0, 0.0},
   {-0.0, -0.0},
   {1.0, 1.0},
   {-1.0, -1.0},
 };
 static const TableEntry tbl_cos[] = {
   {1.0, 0.0},
   {1.0, -0.0}
 };
 static const TableEntry tbl_cosh[] = {
   {1.0, 0.0},
   {1.0, -0.0}
 };
 static const TableEntry tbl_cospi[] = {
   {1.0, 0.0},
   {1.0, -0.0}
 };
 static const TableEntry tbl_erfc[] = {
   {1.0, 0.0},
   {1.0, -0.0}
 };
 static const TableEntry tbl_erf[] = {
   {0.0, 0.0},
   {-0.0, -0.0}
 };
 static const TableEntry tbl_exp[] = {
   {1.0, 0.0},
   {1.0, -0.0},
   {MATH_E, 1.0}
 };
 static const TableEntry tbl_exp2[] = {
   {1.0, 0.0},
   {1.0, -0.0},
   {2.0, 1.0}
 };
 static const TableEntry tbl_exp10[] = {
   {1.0, 0.0},
   {1.0, -0.0},
   {10.0, 1.0}
 };
 static const TableEntry tbl_expm1[] = {
   {0.0, 0.0},
   {-0.0, -0.0}
 };
 static const TableEntry tbl_log[] = {
   {0.0, 1.0},
   {1.0, MATH_E}
 };
 static const TableEntry tbl_log2[] = {
   {0.0, 1.0},
   {1.0, 2.0}
 };
 static const TableEntry tbl_log10[] = {
   {0.0, 1.0},
   {1.0, 10.0}
 };
 static const TableEntry tbl_rsqrt[] = {
   {1.0, 1.0},
   {1.0/MATH_SQRT2, 2.0}
 };
 static const TableEntry tbl_sin[] = {
   {0.0, 0.0},
   {-0.0, -0.0}
 };
 static const TableEntry tbl_sinh[] = {
   {0.0, 0.0},
   {-0.0, -0.0}
 };
 static const TableEntry tbl_sinpi[] = {
   {0.0, 0.0},
   {-0.0, -0.0}
 };
 static const TableEntry tbl_sqrt[] = {
   {0.0, 0.0},
   {1.0, 1.0},
   {MATH_SQRT2, 2.0}
 };
 static const TableEntry tbl_tan[] = {
   {0.0, 0.0},
   {-0.0, -0.0}
 };
 static const TableEntry tbl_tanh[] = {
   {0.0, 0.0},
   {-0.0, -0.0}
 };
 static const TableEntry tbl_tanpi[] = {
   {0.0, 0.0},
   {-0.0, -0.0}
 };
 static const TableEntry tbl_tgamma[] = {
   {1.0, 1.0},
   {1.0, 2.0},
   {2.0, 3.0},
   {6.0, 4.0}
 };

 static bool HasNative(AMDGPULibFunc::EFuncId id) {
   switch(id) {
   case AMDGPULibFunc::EI_DIVIDE:
   case AMDGPULibFunc::EI_COS:
   case AMDGPULibFunc::EI_EXP:
   case AMDGPULibFunc::EI_EXP2:
   case AMDGPULibFunc::EI_EXP10:
   case AMDGPULibFunc::EI_LOG:
   case AMDGPULibFunc::EI_LOG2:
   case AMDGPULibFunc::EI_LOG10:
   case AMDGPULibFunc::EI_POWR:
   case AMDGPULibFunc::EI_RECIP:
   case AMDGPULibFunc::EI_RSQRT:
   case AMDGPULibFunc::EI_SIN:
   case AMDGPULibFunc::EI_SINCOS:
   case AMDGPULibFunc::EI_SQRT:
   case AMDGPULibFunc::EI_TAN:
     return true;
   default:;
   }
   return false;
 }

 struct TableRef {
   size_t size;
   const TableEntry *table; // variable size: from 0 to (size - 1)

   TableRef() : size(0), table(nullptr) {}

   template <size_t N>
   TableRef(const TableEntry (&tbl)[N]) : size(N), table(&tbl[0]) {}
 };

 static TableRef getOptTable(AMDGPULibFunc::EFuncId id) {
   switch(id) {
   case AMDGPULibFunc::EI_ACOS:    return TableRef(tbl_acos);
   case AMDGPULibFunc::EI_ACOSH:   return TableRef(tbl_acosh);
   case AMDGPULibFunc::EI_ACOSPI:  return TableRef(tbl_acospi);
   case AMDGPULibFunc::EI_ASIN:    return TableRef(tbl_asin);
   case AMDGPULibFunc::EI_ASINH:   return TableRef(tbl_asinh);
   case AMDGPULibFunc::EI_ASINPI:  return TableRef(tbl_asinpi);
   case AMDGPULibFunc::EI_ATAN:    return TableRef(tbl_atan);
   case AMDGPULibFunc::EI_ATANH:   return TableRef(tbl_atanh);
   case AMDGPULibFunc::EI_ATANPI:  return TableRef(tbl_atanpi);
   case AMDGPULibFunc::EI_CBRT:    return TableRef(tbl_cbrt);
   case AMDGPULibFunc::EI_NCOS:
   case AMDGPULibFunc::EI_COS:     return TableRef(tbl_cos);
   case AMDGPULibFunc::EI_COSH:    return TableRef(tbl_cosh);
   case AMDGPULibFunc::EI_COSPI:   return TableRef(tbl_cospi);
   case AMDGPULibFunc::EI_ERFC:    return TableRef(tbl_erfc);
   case AMDGPULibFunc::EI_ERF:     return TableRef(tbl_erf);
   case AMDGPULibFunc::EI_EXP:     return TableRef(tbl_exp);
   case AMDGPULibFunc::EI_NEXP2:
   case AMDGPULibFunc::EI_EXP2:    return TableRef(tbl_exp2);
   case AMDGPULibFunc::EI_EXP10:   return TableRef(tbl_exp10);
   case AMDGPULibFunc::EI_EXPM1:   return TableRef(tbl_expm1);
   case AMDGPULibFunc::EI_LOG:     return TableRef(tbl_log);
   case AMDGPULibFunc::EI_NLOG2:
   case AMDGPULibFunc::EI_LOG2:    return TableRef(tbl_log2);
   case AMDGPULibFunc::EI_LOG10:   return TableRef(tbl_log10);
   case AMDGPULibFunc::EI_NRSQRT:
   case AMDGPULibFunc::EI_RSQRT:   return TableRef(tbl_rsqrt);
   case AMDGPULibFunc::EI_NSIN:
   case AMDGPULibFunc::EI_SIN:     return TableRef(tbl_sin);
   case AMDGPULibFunc::EI_SINH:    return TableRef(tbl_sinh);
   case AMDGPULibFunc::EI_SINPI:   return TableRef(tbl_sinpi);
   case AMDGPULibFunc::EI_NSQRT:
   case AMDGPULibFunc::EI_SQRT:    return TableRef(tbl_sqrt);
   case AMDGPULibFunc::EI_TAN:     return TableRef(tbl_tan);
   case AMDGPULibFunc::EI_TANH:    return TableRef(tbl_tanh);
   case AMDGPULibFunc::EI_TANPI:   return TableRef(tbl_tanpi);
   case AMDGPULibFunc::EI_TGAMMA:  return TableRef(tbl_tgamma);
   default:;
   }
   return TableRef();
 }

 static inline int getVecSize(const AMDGPULibFunc& FInfo) {
   return FInfo.getLeads()[0].VectorSize;
 }

 static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) {
   return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType;
 }

 Constant *AMDGPULibCalls::getFunction(Module *M, const FuncInfo& fInfo) {
   // If we are doing PreLinkOpt, the function is external. So it is safe to
   // use getOrInsertFunction() at this stage.

   return EnablePreLink ? AMDGPULibFunc::getOrInsertFunction(M, fInfo)
                        : AMDGPULibFunc::getFunction(M, fInfo);
 }

 bool AMDGPULibCalls::parseFunctionName(const StringRef& FMangledName,
                                     FuncInfo *FInfo) {
   return AMDGPULibFunc::parse(FMangledName, *FInfo);
 }

 bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const {
   if (auto Op = dyn_cast<FPMathOperator>(CI))
     if (Op->isFast())
       return true;
   const Function *F = CI->getParent()->getParent();
   Attribute Attr = F->getFnAttribute("unsafe-fp-math");
   return Attr.getValueAsString() == "true";
 }

 bool AMDGPULibCalls::useNativeFunc(const StringRef F) const {
   return AllNative ||
          std::find(UseNative.begin(), UseNative.end(), F) != UseNative.end();
 }

 void AMDGPULibCalls::initNativeFuncs() {
   AllNative = useNativeFunc("all") ||
               (UseNative.getNumOccurrences() && UseNative.size() == 1 &&
                UseNative.begin()->empty());
 }

 bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
   bool native_sin = useNativeFunc("sin");
   bool native_cos = useNativeFunc("cos");

   if (native_sin && native_cos) {
     Module *M = aCI->getModule();
     Value *opr0 = aCI->getArgOperand(0);

     AMDGPULibFunc nf;
     nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType;
     nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize;

     nf.setPrefix(AMDGPULibFunc::NATIVE);
     nf.setId(AMDGPULibFunc::EI_SIN);
     Constant *sinExpr = getFunction(M, nf);

     nf.setPrefix(AMDGPULibFunc::NATIVE);
     nf.setId(AMDGPULibFunc::EI_COS);
     Constant *cosExpr = getFunction(M, nf);
     if (sinExpr && cosExpr) {
       Value *sinval = CallInst::Create(sinExpr, opr0, "splitsin", aCI);
       Value *cosval = CallInst::Create(cosExpr, opr0, "splitcos", aCI);
       new StoreInst(cosval, aCI->getArgOperand(1), aCI);

       DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
                                           << " with native version of sin/cos");

       replaceCall(sinval);
       return true;
     }
   }
   return false;
 }

 bool AMDGPULibCalls::useNative(CallInst *aCI) {
   CI = aCI;
   Function *Callee = aCI->getCalledFunction();

   FuncInfo FInfo;
   if (!parseFunctionName(Callee->getName(), &FInfo) || !FInfo.isMangled() ||
       FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
       getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) ||
       !(AllNative || useNativeFunc(FInfo.getName()))) {
     return false;
   }

   if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS)
     return sincosUseNative(aCI, FInfo);

   FInfo.setPrefix(AMDGPULibFunc::NATIVE);
   Constant *F = getFunction(aCI->getModule(), FInfo);
   if (!F)
     return false;

   aCI->setCalledFunction(F);
   DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
                                       << " with native version");
   return true;
 }

 // Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe
 // builtin, with appended type size and alignment arguments, where 2 or 4
 // indicates the original number of arguments. The library has optimized version
 // of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same
 // power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N
 // for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ...,
 // 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4.
 bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
                                           FuncInfo &FInfo) {
   auto *Callee = CI->getCalledFunction();
   if (!Callee->isDeclaration())
     return false;

   assert(Callee->hasName() && "Invalid read_pipe/write_pipe function");
   auto *M = Callee->getParent();
   auto &Ctx = M->getContext();
   std::string Name = Callee->getName();
   auto NumArg = CI->getNumArgOperands();
   if (NumArg != 4 && NumArg != 6)
     return false;
   auto *PacketSize = CI->getArgOperand(NumArg - 2);
   auto *PacketAlign = CI->getArgOperand(NumArg - 1);
   if (!isa<ConstantInt>(PacketSize) || !isa<ConstantInt>(PacketAlign))
     return false;
   unsigned Size = cast<ConstantInt>(PacketSize)->getZExtValue();
   unsigned Align = cast<ConstantInt>(PacketAlign)->getZExtValue();
   if (Size != Align || !isPowerOf2_32(Size))
     return false;

   Type *PtrElemTy;
   if (Size <= 8)
     PtrElemTy = Type::getIntNTy(Ctx, Size * 8);
   else
     PtrElemTy = VectorType::get(Type::getInt64Ty(Ctx), Size / 8);
   unsigned PtrArgLoc = CI->getNumArgOperands() - 3;
   auto PtrArg = CI->getArgOperand(PtrArgLoc);
   unsigned PtrArgAS = PtrArg->getType()->getPointerAddressSpace();
   auto *PtrTy = llvm::PointerType::get(PtrElemTy, PtrArgAS);

   SmallVector<llvm::Type *, 6> ArgTys;
   for (unsigned I = 0; I != PtrArgLoc; ++I)
     ArgTys.push_back(CI->getArgOperand(I)->getType());
   ArgTys.push_back(PtrTy);

   Name = Name + "_" + std::to_string(Size);
   auto *FTy = FunctionType::get(Callee->getReturnType(),
                                 ArrayRef<Type *>(ArgTys), false);
   AMDGPULibFunc NewLibFunc(Name, FTy);
   auto *F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc);
   if (!F)
     return false;

   auto *BCast = B.CreatePointerCast(PtrArg, PtrTy);
   SmallVector<Value *, 6> Args;
   for (unsigned I = 0; I != PtrArgLoc; ++I)
     Args.push_back(CI->getArgOperand(I));
   Args.push_back(BCast);

   auto *NCI = B.CreateCall(F, Args);
   NCI->setAttributes(CI->getAttributes());
   CI->replaceAllUsesWith(NCI);
   CI->dropAllReferences();
   CI->eraseFromParent();

   return true;
 }

 // This function returns false if no change; return true otherwise.
 bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
   this->CI = CI;
   Function *Callee = CI->getCalledFunction();

   // Ignore indirect calls.
   if (Callee == 0) return false;

   FuncInfo FInfo;
   if (!parseFunctionName(Callee->getName(), &FInfo))
     return false;

   // Further check the number of arguments to see if they match.
   if (CI->getNumArgOperands() != FInfo.getNumArgs())
     return false;

   BasicBlock *BB = CI->getParent();
   LLVMContext &Context = CI->getParent()->getContext();
   IRBuilder<> B(Context);

   // Set the builder to the instruction after the call.
   B.SetInsertPoint(BB, CI->getIterator());

   // Copy fast flags from the original call.
   if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(CI))
     B.setFastMathFlags(FPOp->getFastMathFlags());

   if (TDOFold(CI, FInfo))
     return true;

   // Under unsafe-math, evaluate calls if possible.
   // According to Brian Sumner, we can do this for all f32 function calls
   // using host's double function calls.
   if (isUnsafeMath(CI) && evaluateCall(CI, FInfo))
     return true;

   // Specilized optimizations for each function call
   switch (FInfo.getId()) {
   case AMDGPULibFunc::EI_RECIP:
     // skip vector function
     assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||
              FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
             "recip must be an either native or half function");
     return (getVecSize(FInfo) != 1) ? false : fold_recip(CI, B, FInfo);

   case AMDGPULibFunc::EI_DIVIDE:
     // skip vector function
     assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||
              FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
             "divide must be an either native or half function");
     return (getVecSize(FInfo) != 1) ? false : fold_divide(CI, B, FInfo);

   case AMDGPULibFunc::EI_POW:
   case AMDGPULibFunc::EI_POWR:
   case AMDGPULibFunc::EI_POWN:
     return fold_pow(CI, B, FInfo);

   case AMDGPULibFunc::EI_ROOTN:
     // skip vector function
     return (getVecSize(FInfo) != 1) ? false : fold_rootn(CI, B, FInfo);

   case AMDGPULibFunc::EI_FMA:
   case AMDGPULibFunc::EI_MAD:
   case AMDGPULibFunc::EI_NFMA:
     // skip vector function
     return (getVecSize(FInfo) != 1) ? false : fold_fma_mad(CI, B, FInfo);

   case AMDGPULibFunc::EI_SQRT:
     return isUnsafeMath(CI) && fold_sqrt(CI, B, FInfo);
   case AMDGPULibFunc::EI_COS:
   case AMDGPULibFunc::EI_SIN:
     if ((getArgType(FInfo) == AMDGPULibFunc::F32 ||
          getArgType(FInfo) == AMDGPULibFunc::F64)
         && (FInfo.getPrefix() == AMDGPULibFunc::NOPFX))
       return fold_sincos(CI, B, AA);

     break;
   case AMDGPULibFunc::EI_READ_PIPE_2:
   case AMDGPULibFunc::EI_READ_PIPE_4:
   case AMDGPULibFunc::EI_WRITE_PIPE_2:
   case AMDGPULibFunc::EI_WRITE_PIPE_4:
     return fold_read_write_pipe(CI, B, FInfo);

   default:
     break;
   }

   return false;
 }

 bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
   // Table-Driven optimization
   const TableRef tr = getOptTable(FInfo.getId());
   if (tr.size==0)
     return false;

   int const sz = (int)tr.size;
   const TableEntry * const ftbl = tr.table;
   Value *opr0 = CI->getArgOperand(0);

   if (getVecSize(FInfo) > 1) {
     if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(opr0)) {
       SmallVector<double, 0> DVal;
       for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) {
         ConstantFP *eltval = dyn_cast<ConstantFP>(
                                CV->getElementAsConstant((unsigned)eltNo));
         assert(eltval && "Non-FP arguments in math function!");
         bool found = false;
         for (int i=0; i < sz; ++i) {
           if (eltval->isExactlyValue(ftbl[i].input)) {
             DVal.push_back(ftbl[i].result);
             found = true;
             break;
           }
         }
         if (!found) {
           // This vector constants not handled yet.
           return false;
         }
       }
       LLVMContext &context = CI->getParent()->getParent()->getContext();
       Constant *nval;
       if (getArgType(FInfo) == AMDGPULibFunc::F32) {
         SmallVector<float, 0> FVal;
         for (unsigned i = 0; i < DVal.size(); ++i) {
           FVal.push_back((float)DVal[i]);
         }
         ArrayRef<float> tmp(FVal);
         nval = ConstantDataVector::get(context, tmp);
       } else { // F64
         ArrayRef<double> tmp(DVal);
         nval = ConstantDataVector::get(context, tmp);
       }
       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
       replaceCall(nval);
       return true;
     }
   } else {
     // Scalar version
     if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
       for (int i = 0; i < sz; ++i) {
         if (CF->isExactlyValue(ftbl[i].input)) {
           Value *nval = ConstantFP::get(CF->getType(), ftbl[i].result);
           LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
           replaceCall(nval);
           return true;
         }
       }
     }
   }

   return false;
 }

 bool AMDGPULibCalls::replaceWithNative(CallInst *CI, const FuncInfo &FInfo) {
   Module *M = CI->getModule();
   if (getArgType(FInfo) != AMDGPULibFunc::F32 ||
       FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
       !HasNative(FInfo.getId()))
     return false;

   AMDGPULibFunc nf = FInfo;
   nf.setPrefix(AMDGPULibFunc::NATIVE);
   if (Constant *FPExpr = getFunction(M, nf)) {
     LLVM_DEBUG(dbgs() << "AMDIC: " << *CI << " ---> ");

     CI->setCalledFunction(FPExpr);

     LLVM_DEBUG(dbgs() << *CI << '\n');

     return true;
   }
   return false;
 }

 //  [native_]half_recip(c) ==> 1.0/c
 bool AMDGPULibCalls::fold_recip(CallInst *CI, IRBuilder<> &B,
                                 const FuncInfo &FInfo) {
   Value *opr0 = CI->getArgOperand(0);
   if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
     // Just create a normal div. Later, InstCombine will be able
     // to compute the divide into a constant (avoid check float infinity
     // or subnormal at this point).
     Value *nval = B.CreateFDiv(ConstantFP::get(CF->getType(), 1.0),
                                opr0,
                                "recip2div");
     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
     replaceCall(nval);
     return true;
   }
   return false;
 }

 //  [native_]half_divide(x, c) ==> x/c
 bool AMDGPULibCalls::fold_divide(CallInst *CI, IRBuilder<> &B,
                                  const FuncInfo &FInfo) {
   Value *opr0 = CI->getArgOperand(0);
   Value *opr1 = CI->getArgOperand(1);
   ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
   ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);

   if ((CF0 && CF1) ||  // both are constants
       (CF1 && (getArgType(FInfo) == AMDGPULibFunc::F32)))
       // CF1 is constant && f32 divide
   {
     Value *nval1 = B.CreateFDiv(ConstantFP::get(opr1->getType(), 1.0),
                                 opr1, "__div2recip");
     Value *nval  = B.CreateFMul(opr0, nval1, "__div2mul");
     replaceCall(nval);
     return true;
   }
   return false;
 }

 namespace llvm {
 static double log2(double V) {
 #if _XOPEN_SOURCE >= 600 || _ISOC99_SOURCE || _POSIX_C_SOURCE >= 200112L
   return ::log2(V);
 #else
   return log(V) / 0.693147180559945309417;
 #endif
 }
 }

 bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
                               const FuncInfo &FInfo) {
   assert((FInfo.getId() == AMDGPULibFunc::EI_POW ||
           FInfo.getId() == AMDGPULibFunc::EI_POWR ||
           FInfo.getId() == AMDGPULibFunc::EI_POWN) &&
          "fold_pow: encounter a wrong function call");

   Value *opr0, *opr1;
   ConstantFP *CF;
   ConstantInt *CINT;
   ConstantAggregateZero *CZero;
   Type *eltType;

   opr0 = CI->getArgOperand(0);
   opr1 = CI->getArgOperand(1);
   CZero = dyn_cast<ConstantAggregateZero>(opr1);
   if (getVecSize(FInfo) == 1) {
     eltType = opr0->getType();
     CF = dyn_cast<ConstantFP>(opr1);
     CINT = dyn_cast<ConstantInt>(opr1);
   } else {
     VectorType *VTy = dyn_cast<VectorType>(opr0->getType());
     assert(VTy && "Oprand of vector function should be of vectortype");
     eltType = VTy->getElementType();
     ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1);

     // Now, only Handle vector const whose elements have the same value.
     CF = CDV ? dyn_cast_or_null<ConstantFP>(CDV->getSplatValue()) : nullptr;
     CINT = CDV ? dyn_cast_or_null<ConstantInt>(CDV->getSplatValue()) : nullptr;
   }

   // No unsafe math , no constant argument, do nothing
   if (!isUnsafeMath(CI) && !CF && !CINT && !CZero)
     return false;

   // 0x1111111 means that we don't do anything for this call.
   int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111);

   if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0) || CZero) {
     //  pow/powr/pown(x, 0) == 1
     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1\n");
     Constant *cnval = ConstantFP::get(eltType, 1.0);
     if (getVecSize(FInfo) > 1) {
       cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
     }
     replaceCall(cnval);
     return true;
   }
   if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) {
     // pow/powr/pown(x, 1.0) = x
     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n");
     replaceCall(opr0);
     return true;
   }
   if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) {
     // pow/powr/pown(x, 2.0) = x*x
     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * " << *opr0
                       << "\n");
     Value *nval = B.CreateFMul(opr0, opr0, "__pow2");
     replaceCall(nval);
     return true;
   }
   if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) {
     // pow/powr/pown(x, -1.0) = 1.0/x
     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1 / " << *opr0 << "\n");
     Constant *cnval = ConstantFP::get(eltType, 1.0);
     if (getVecSize(FInfo) > 1) {
       cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
     }
     Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip");
     replaceCall(nval);
     return true;
   }

   Module *M = CI->getModule();
   if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) {
     // pow[r](x, [-]0.5) = sqrt(x)
     bool issqrt = CF->isExactlyValue(0.5);
     if (Constant *FPExpr = getFunction(M,
         AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
                              : AMDGPULibFunc::EI_RSQRT, FInfo))) {
       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
                         << FInfo.getName().c_str() << "(" << *opr0 << ")\n");
       Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
                                                         : "__pow2rsqrt");
       replaceCall(nval);
       return true;
     }
   }

   if (!isUnsafeMath(CI))
     return false;

   // Unsafe Math optimization

   // Remember that ci_opr1 is set if opr1 is integral
   if (CF) {
     double dval = (getArgType(FInfo) == AMDGPULibFunc::F32)
                     ? (double)CF->getValueAPF().convertToFloat()
                     : CF->getValueAPF().convertToDouble();
     int ival = (int)dval;
     if ((double)ival == dval) {
       ci_opr1 = ival;
     } else
       ci_opr1 = 0x11111111;
   }

   // pow/powr/pown(x, c) = [1/](x*x*..x); where
   //   trunc(c) == c && the number of x == c && |c| <= 12
   unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1;
   if (abs_opr1 <= 12) {
     Constant *cnval;
     Value *nval;
     if (abs_opr1 == 0) {
       cnval = ConstantFP::get(eltType, 1.0);
       if (getVecSize(FInfo) > 1) {
         cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
       }
       nval = cnval;
     } else {
       Value *valx2 = nullptr;
       nval = nullptr;
       while (abs_opr1 > 0) {
         valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0;
         if (abs_opr1 & 1) {
           nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2;
         }
         abs_opr1 >>= 1;
       }
     }

     if (ci_opr1 < 0) {
       cnval = ConstantFP::get(eltType, 1.0);
       if (getVecSize(FInfo) > 1) {
         cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
       }
       nval = B.CreateFDiv(cnval, nval, "__1powprod");
     }
     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
                       << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0
                       << ")\n");
     replaceCall(nval);
     return true;
   }

   // powr ---> exp2(y * log2(x))
   // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31))
   Constant *ExpExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2,
                                                    FInfo));
   if (!ExpExpr)
     return false;

   bool needlog = false;
   bool needabs = false;
   bool needcopysign = false;
   Constant *cnval = nullptr;
   if (getVecSize(FInfo) == 1) {
     CF = dyn_cast<ConstantFP>(opr0);

     if (CF) {
       double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
                    ? (double)CF->getValueAPF().convertToFloat()
                    : CF->getValueAPF().convertToDouble();

       V = log2(std::abs(V));
       cnval = ConstantFP::get(eltType, V);
       needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR) &&
                      CF->isNegative();
     } else {
       needlog = true;
       needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR &&
                                (!CF || CF->isNegative());
     }
   } else {
     ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0);

     if (!CDV) {
       needlog = true;
       needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR;
     } else {
       assert ((int)CDV->getNumElements() == getVecSize(FInfo) &&
               "Wrong vector size detected");

       SmallVector<double, 0> DVal;
       for (int i=0; i < getVecSize(FInfo); ++i) {
         double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
                      ? (double)CDV->getElementAsFloat(i)
                      : CDV->getElementAsDouble(i);
         if (V < 0.0) needcopysign = true;
         V = log2(std::abs(V));
         DVal.push_back(V);
       }
       if (getArgType(FInfo) == AMDGPULibFunc::F32) {
         SmallVector<float, 0> FVal;
         for (unsigned i=0; i < DVal.size(); ++i) {
           FVal.push_back((float)DVal[i]);
         }
         ArrayRef<float> tmp(FVal);
         cnval = ConstantDataVector::get(M->getContext(), tmp);
       } else {
         ArrayRef<double> tmp(DVal);
         cnval = ConstantDataVector::get(M->getContext(), tmp);
       }
     }
   }

   if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) {
     // We cannot handle corner cases for a general pow() function, give up
     // unless y is a constant integral value. Then proceed as if it were pown.
     if (getVecSize(FInfo) == 1) {
       if (const ConstantFP *CF = dyn_cast<ConstantFP>(opr1)) {
         double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
                    ? (double)CF->getValueAPF().convertToFloat()
                    : CF->getValueAPF().convertToDouble();
         if (y != (double)(int64_t)y)
           return false;
       } else
         return false;
     } else {
       if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1)) {
         for (int i=0; i < getVecSize(FInfo); ++i) {
           double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
                      ? (double)CDV->getElementAsFloat(i)
                      : CDV->getElementAsDouble(i);
           if (y != (double)(int64_t)y)
             return false;
         }
       } else
         return false;
     }
   }

   Value *nval;
   if (needabs) {
     Constant *AbsExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS,
                                                      FInfo));
     if (!AbsExpr)
       return false;
     nval = CreateCallEx(B, AbsExpr, opr0, "__fabs");
   } else {
     nval = cnval ? cnval : opr0;
   }
   if (needlog) {
     Constant *LogExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2,
                                                      FInfo));
     if (!LogExpr)
       return false;
     nval = CreateCallEx(B,LogExpr, nval, "__log2");
   }

   if (FInfo.getId() == AMDGPULibFunc::EI_POWN) {
     // convert int(32) to fp(f32 or f64)
     opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F");
   }
   nval = B.CreateFMul(opr1, nval, "__ylogx");
   nval = CreateCallEx(B,ExpExpr, nval, "__exp2");

   if (needcopysign) {
     Value *opr_n;
     Type* rTy = opr0->getType();
     Type* nTyS = eltType->isDoubleTy() ? B.getInt64Ty() : B.getInt32Ty();
     Type *nTy = nTyS;
     if (const VectorType *vTy = dyn_cast<VectorType>(rTy))
       nTy = VectorType::get(nTyS, vTy->getNumElements());
     unsigned size = nTy->getScalarSizeInBits();
     opr_n = CI->getArgOperand(1);
     if (opr_n->getType()->isIntegerTy())
       opr_n = B.CreateZExtOrBitCast(opr_n, nTy, "__ytou");
     else
       opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");

     Value *sign = B.CreateShl(opr_n, size-1, "__yeven");
     sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign");
     nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign);
     nval = B.CreateBitCast(nval, opr0->getType());
   }

   LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
                     << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n");
   replaceCall(nval);

   return true;
 }

 bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
                                 const FuncInfo &FInfo) {
   Value *opr0 = CI->getArgOperand(0);
   Value *opr1 = CI->getArgOperand(1);

   ConstantInt *CINT = dyn_cast<ConstantInt>(opr1);
   if (!CINT) {
     return false;
   }
   int ci_opr1 = (int)CINT->getSExtValue();
   if (ci_opr1 == 1) {  // rootn(x, 1) = x
     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n");
     replaceCall(opr0);
     return true;
   }
   if (ci_opr1 == 2) {  // rootn(x, 2) = sqrt(x)
     std::vector<const Type*> ParamsTys;
     ParamsTys.push_back(opr0->getType());
     Module *M = CI->getModule();
     if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT,
                                                         FInfo))) {
       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> sqrt(" << *opr0 << ")\n");
       Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt");
       replaceCall(nval);
       return true;
     }
   } else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
     Module *M = CI->getModule();
     if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT,
                                                         FInfo))) {
       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> cbrt(" << *opr0 << ")\n");
       Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
       replaceCall(nval);
       return true;
     }
   } else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x
     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1.0 / " << *opr0 << "\n");
     Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0),
                                opr0,
                                "__rootn2div");
     replaceCall(nval);
     return true;
   } else if (ci_opr1 == -2) {  // rootn(x, -2) = rsqrt(x)
     std::vector<const Type*> ParamsTys;
     ParamsTys.push_back(opr0->getType());
     Module *M = CI->getModule();
     if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT,
                                                         FInfo))) {
       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> rsqrt(" << *opr0
                         << ")\n");
       Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt");
       replaceCall(nval);
       return true;
     }
   }
   return false;
 }

 bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B,
                                   const FuncInfo &FInfo) {
   Value *opr0 = CI->getArgOperand(0);
   Value *opr1 = CI->getArgOperand(1);
   Value *opr2 = CI->getArgOperand(2);

   ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
   ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
   if ((CF0 && CF0->isZero()) || (CF1 && CF1->isZero())) {
     // fma/mad(a, b, c) = c if a=0 || b=0
     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr2 << "\n");
     replaceCall(opr2);
     return true;
   }
   if (CF0 && CF0->isExactlyValue(1.0f)) {
     // fma/mad(a, b, c) = b+c if a=1
     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr1 << " + " << *opr2
                       << "\n");
     Value *nval = B.CreateFAdd(opr1, opr2, "fmaadd");
     replaceCall(nval);
     return true;
   }
   if (CF1 && CF1->isExactlyValue(1.0f)) {
     // fma/mad(a, b, c) = a+c if b=1
     LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " + " << *opr2
                       << "\n");
     Value *nval = B.CreateFAdd(opr0, opr2, "fmaadd");
     replaceCall(nval);
     return true;
   }
   if (ConstantFP *CF = dyn_cast<ConstantFP>(opr2)) {
     if (CF->isZero()) {
       // fma/mad(a, b, c) = a*b if c=0
       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * "
                         << *opr1 << "\n");
       Value *nval = B.CreateFMul(opr0, opr1, "fmamul");
       replaceCall(nval);
       return true;
     }
   }

   return false;
 }

 // Get a scalar native builtin signle argument FP function
 Constant* AMDGPULibCalls::getNativeFunction(Module* M, const FuncInfo& FInfo) {
   if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
     return nullptr;
   FuncInfo nf = FInfo;
   nf.setPrefix(AMDGPULibFunc::NATIVE);
   return getFunction(M, nf);
 }

 // fold sqrt -> native_sqrt (x)
 bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B,
                                const FuncInfo &FInfo) {
   if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) &&
       (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) {
     if (Constant *FPExpr = getNativeFunction(
         CI->getModule(), AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
       Value *opr0 = CI->getArgOperand(0);
       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
                         << "sqrt(" << *opr0 << ")\n");
       Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt");
       replaceCall(nval);
       return true;
     }
   }
   return false;
 }

 // fold sin, cos -> sincos.
 bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
                                  AliasAnalysis *AA) {
   AMDGPULibFunc fInfo;
   if (!AMDGPULibFunc::parse(CI->getCalledFunction()->getName(), fInfo))
     return false;

   assert(fInfo.getId() == AMDGPULibFunc::EI_SIN ||
          fInfo.getId() == AMDGPULibFunc::EI_COS);
   bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN;

   Value *CArgVal = CI->getArgOperand(0);
   BasicBlock * const CBB = CI->getParent();

   int const MaxScan = 30;

   { // fold in load value.
     LoadInst *LI = dyn_cast<LoadInst>(CArgVal);
     if (LI && LI->getParent() == CBB) {
       BasicBlock::iterator BBI = LI->getIterator();
       Value *AvailableVal = FindAvailableLoadedValue(LI, CBB, BBI, MaxScan, AA);
       if (AvailableVal) {
         CArgVal->replaceAllUsesWith(AvailableVal);
         if (CArgVal->getNumUses() == 0)
           LI->eraseFromParent();
         CArgVal = CI->getArgOperand(0);
       }
     }
   }

   Module *M = CI->getModule();
   fInfo.setId(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN);
   std::string const PairName = fInfo.mangle();

   CallInst *UI = nullptr;
   for (User* U : CArgVal->users()) {
     CallInst *XI = dyn_cast_or_null<CallInst>(U);
     if (!XI || XI == CI || XI->getParent() != CBB)
       continue;

     Function *UCallee = XI->getCalledFunction();
     if (!UCallee || !UCallee->getName().equals(PairName))
       continue;

     BasicBlock::iterator BBI = CI->getIterator();
     if (BBI == CI->getParent()->begin())
       break;
     --BBI;
     for (int I = MaxScan; I > 0 && BBI != CBB->begin(); --BBI, --I) {
       if (cast<Instruction>(BBI) == XI) {
         UI = XI;
         break;
       }
     }
     if (UI) break;
   }

   if (!UI) return false;

   // Merge the sin and cos.

   // for OpenCL 2.0 we have only generic implementation of sincos
   // function.
   AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo);
   nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS);
   Function *Fsincos = dyn_cast_or_null<Function>(getFunction(M, nf));
   if (!Fsincos) return false;

   BasicBlock::iterator ItOld = B.GetInsertPoint();
   AllocaInst *Alloc = insertAlloca(UI, B, "__sincos_");
   B.SetInsertPoint(UI);

   Value *P = Alloc;
   Type *PTy = Fsincos->getFunctionType()->getParamType(1);
   // The allocaInst allocates the memory in private address space. This need
   // to be bitcasted to point to the address space of cos pointer type.
   // In OpenCL 2.0 this is generic, while in 1.2 that is private.
   if (PTy->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
     P = B.CreateAddrSpaceCast(Alloc, PTy);
   CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P);

   LLVM_DEBUG(errs() << "AMDIC: fold_sincos (" << *CI << ", " << *UI << ") with "
                     << *Call << "\n");

   if (!isSin) { // CI->cos, UI->sin
     B.SetInsertPoint(&*ItOld);
     UI->replaceAllUsesWith(&*Call);
     Instruction *Reload = B.CreateLoad(Alloc);
     CI->replaceAllUsesWith(Reload);
     UI->eraseFromParent();
     CI->eraseFromParent();
   } else { // CI->sin, UI->cos
     Instruction *Reload = B.CreateLoad(Alloc);
     UI->replaceAllUsesWith(Reload);
     CI->replaceAllUsesWith(Call);
     UI->eraseFromParent();
     CI->eraseFromParent();
   }
   return true;
 }

 // Get insertion point at entry.
 BasicBlock::iterator AMDGPULibCalls::getEntryIns(CallInst * UI) {
   Function * Func = UI->getParent()->getParent();
   BasicBlock * BB = &Func->getEntryBlock();
   assert(BB && "Entry block not found!");
   BasicBlock::iterator ItNew = BB->begin();
   return ItNew;
 }

 // Insert a AllocsInst at the beginning of function entry block.
 AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B,
                                          const char *prefix) {
   BasicBlock::iterator ItNew = getEntryIns(UI);
   Function *UCallee = UI->getCalledFunction();
   Type *RetType = UCallee->getReturnType();
   B.SetInsertPoint(&*ItNew);
   AllocaInst *Alloc = B.CreateAlloca(RetType, 0,
     std::string(prefix) + UI->getName());
   Alloc->setAlignment(UCallee->getParent()->getDataLayout()
                        .getTypeAllocSize(RetType));
   return Alloc;
 }

 bool AMDGPULibCalls::evaluateScalarMathFunc(FuncInfo &FInfo,
                                             double& Res0, double& Res1,
                                             Constant *copr0, Constant *copr1,
                                             Constant *copr2) {
   // By default, opr0/opr1/opr3 holds values of float/double type.
   // If they are not float/double, each function has to its
   // operand separately.
   double opr0=0.0, opr1=0.0, opr2=0.0;
   ConstantFP *fpopr0 = dyn_cast_or_null<ConstantFP>(copr0);
   ConstantFP *fpopr1 = dyn_cast_or_null<ConstantFP>(copr1);
   ConstantFP *fpopr2 = dyn_cast_or_null<ConstantFP>(copr2);
   if (fpopr0) {
     opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64)
              ? fpopr0->getValueAPF().convertToDouble()
              : (double)fpopr0->getValueAPF().convertToFloat();
   }

   if (fpopr1) {
     opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64)
              ? fpopr1->getValueAPF().convertToDouble()
              : (double)fpopr1->getValueAPF().convertToFloat();
   }

   if (fpopr2) {
     opr2 = (getArgType(FInfo) == AMDGPULibFunc::F64)
              ? fpopr2->getValueAPF().convertToDouble()
              : (double)fpopr2->getValueAPF().convertToFloat();
   }

   switch (FInfo.getId()) {
   default : return false;

   case AMDGPULibFunc::EI_ACOS:
     Res0 = acos(opr0);
     return true;

   case AMDGPULibFunc::EI_ACOSH:
     // acosh(x) == log(x + sqrt(x*x - 1))
     Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0));
     return true;

   case AMDGPULibFunc::EI_ACOSPI:
     Res0 = acos(opr0) / MATH_PI;
     return true;

   case AMDGPULibFunc::EI_ASIN:
     Res0 = asin(opr0);
     return true;

   case AMDGPULibFunc::EI_ASINH:
     // asinh(x) == log(x + sqrt(x*x + 1))
     Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0));
     return true;

   case AMDGPULibFunc::EI_ASINPI:
     Res0 = asin(opr0) / MATH_PI;
     return true;

   case AMDGPULibFunc::EI_ATAN:
     Res0 = atan(opr0);
     return true;

   case AMDGPULibFunc::EI_ATANH:
     // atanh(x) == (log(x+1) - log(x-1))/2;
     Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0;
     return true;

   case AMDGPULibFunc::EI_ATANPI:
     Res0 = atan(opr0) / MATH_PI;
     return true;

   case AMDGPULibFunc::EI_CBRT:
     Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0);
     return true;

   case AMDGPULibFunc::EI_COS:
     Res0 = cos(opr0);
     return true;

   case AMDGPULibFunc::EI_COSH:
     Res0 = cosh(opr0);
     return true;

   case AMDGPULibFunc::EI_COSPI:
     Res0 = cos(MATH_PI * opr0);
     return true;

   case AMDGPULibFunc::EI_EXP:
     Res0 = exp(opr0);
     return true;

   case AMDGPULibFunc::EI_EXP2:
     Res0 = pow(2.0, opr0);
     return true;

   case AMDGPULibFunc::EI_EXP10:
     Res0 = pow(10.0, opr0);
     return true;

   case AMDGPULibFunc::EI_EXPM1:
     Res0 = exp(opr0) - 1.0;
     return true;

   case AMDGPULibFunc::EI_LOG:
     Res0 = log(opr0);
     return true;

   case AMDGPULibFunc::EI_LOG2:
     Res0 = log(opr0) / log(2.0);
     return true;

   case AMDGPULibFunc::EI_LOG10:
     Res0 = log(opr0) / log(10.0);
     return true;

   case AMDGPULibFunc::EI_RSQRT:
     Res0 = 1.0 / sqrt(opr0);
     return true;

   case AMDGPULibFunc::EI_SIN:
     Res0 = sin(opr0);
     return true;

   case AMDGPULibFunc::EI_SINH:
     Res0 = sinh(opr0);
     return true;

   case AMDGPULibFunc::EI_SINPI:
     Res0 = sin(MATH_PI * opr0);
     return true;

   case AMDGPULibFunc::EI_SQRT:
     Res0 = sqrt(opr0);
     return true;

   case AMDGPULibFunc::EI_TAN:
     Res0 = tan(opr0);
     return true;

   case AMDGPULibFunc::EI_TANH:
     Res0 = tanh(opr0);
     return true;

   case AMDGPULibFunc::EI_TANPI:
     Res0 = tan(MATH_PI * opr0);
     return true;

   case AMDGPULibFunc::EI_RECIP:
     Res0 = 1.0 / opr0;
     return true;

   // two-arg functions
   case AMDGPULibFunc::EI_DIVIDE:
     Res0 = opr0 / opr1;
     return true;

   case AMDGPULibFunc::EI_POW:
   case AMDGPULibFunc::EI_POWR:
     Res0 = pow(opr0, opr1);
     return true;

   case AMDGPULibFunc::EI_POWN: {
     if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
       double val = (double)iopr1->getSExtValue();
       Res0 = pow(opr0, val);
       return true;
     }
     return false;
   }

   case AMDGPULibFunc::EI_ROOTN: {
     if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
       double val = (double)iopr1->getSExtValue();
       Res0 = pow(opr0, 1.0 / val);
       return true;
     }
     return false;
   }

   // with ptr arg
   case AMDGPULibFunc::EI_SINCOS:
     Res0 = sin(opr0);
     Res1 = cos(opr0);
     return true;

   // three-arg functions
   case AMDGPULibFunc::EI_FMA:
   case AMDGPULibFunc::EI_MAD:
     Res0 = opr0 * opr1 + opr2;
     return true;
   }

   return false;
 }

 bool AMDGPULibCalls::evaluateCall(CallInst *aCI, FuncInfo &FInfo) {
   int numArgs = (int)aCI->getNumArgOperands();
   if (numArgs > 3)
     return false;

   Constant *copr0 = nullptr;
   Constant *copr1 = nullptr;
   Constant *copr2 = nullptr;
   if (numArgs > 0) {
     if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr)
       return false;
   }

   if (numArgs > 1) {
     if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) {
       if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS)
         return false;
     }
   }

   if (numArgs > 2) {
     if ((copr2 = dyn_cast<Constant>(aCI->getArgOperand(2))) == nullptr)
       return false;
   }

   // At this point, all arguments to aCI are constants.

   // max vector size is 16, and sincos will generate two results.
   double DVal0[16], DVal1[16];
   bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS);
   if (getVecSize(FInfo) == 1) {
     if (!evaluateScalarMathFunc(FInfo, DVal0[0],
                                 DVal1[0], copr0, copr1, copr2)) {
       return false;
     }
   } else {
     ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0);
     ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1);
     ConstantDataVector *CDV2 = dyn_cast_or_null<ConstantDataVector>(copr2);
     for (int i=0; i < getVecSize(FInfo); ++i) {
       Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr;
       Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr;
       Constant *celt2 = CDV2 ? CDV2->getElementAsConstant(i) : nullptr;
       if (!evaluateScalarMathFunc(FInfo, DVal0[i],
                                   DVal1[i], celt0, celt1, celt2)) {
         return false;
       }
     }
   }

   LLVMContext &context = CI->getParent()->getParent()->getContext();
   Constant *nval0, *nval1;
   if (getVecSize(FInfo) == 1) {
     nval0 = ConstantFP::get(CI->getType(), DVal0[0]);
     if (hasTwoResults)
       nval1 = ConstantFP::get(CI->getType(), DVal1[0]);
   } else {
     if (getArgType(FInfo) == AMDGPULibFunc::F32) {
       SmallVector <float, 0> FVal0, FVal1;
       for (int i=0; i < getVecSize(FInfo); ++i)
         FVal0.push_back((float)DVal0[i]);
       ArrayRef<float> tmp0(FVal0);
       nval0 = ConstantDataVector::get(context, tmp0);
       if (hasTwoResults) {
         for (int i=0; i < getVecSize(FInfo); ++i)
           FVal1.push_back((float)DVal1[i]);
         ArrayRef<float> tmp1(FVal1);
         nval1 = ConstantDataVector::get(context, tmp1);
       }
     } else {
       ArrayRef<double> tmp0(DVal0);
       nval0 = ConstantDataVector::get(context, tmp0);
       if (hasTwoResults) {
         ArrayRef<double> tmp1(DVal1);
         nval1 = ConstantDataVector::get(context, tmp1);
       }
     }
   }

   if (hasTwoResults) {
     // sincos
     assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS &&
            "math function with ptr arg not supported yet");
     new StoreInst(nval1, aCI->getArgOperand(1), aCI);
   }

   replaceCall(nval0);
   return true;
 }

 // Public interface to the Simplify LibCalls pass.
 FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetOptions &Opt) {
   return new AMDGPUSimplifyLibCalls(Opt);
 }

 FunctionPass *llvm::createAMDGPUUseNativeCallsPass() {
   return new AMDGPUUseNativeCalls();
 }

 static bool setFastFlags(Function &F, const TargetOptions &Options) {
   AttrBuilder B;

   if (Options.UnsafeFPMath || Options.NoInfsFPMath)
     B.addAttribute("no-infs-fp-math", "true");
   if (Options.UnsafeFPMath || Options.NoNaNsFPMath)
     B.addAttribute("no-nans-fp-math", "true");
   if (Options.UnsafeFPMath) {
     B.addAttribute("less-precise-fpmad", "true");
     B.addAttribute("unsafe-fp-math", "true");
   }

   if (!B.hasAttributes())
     return false;

   F.addAttributes(AttributeList::FunctionIndex, B);

   return true;
 }

 bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) {
   if (skipFunction(F))
     return false;

   bool Changed = false;
   auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();

   LLVM_DEBUG(dbgs() << "AMDIC: process function ";
              F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);

   if (!EnablePreLink)
     Changed |= setFastFlags(F, Options);

   for (auto &BB : F) {
     for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
       // Ignore non-calls.
       CallInst *CI = dyn_cast<CallInst>(I);
       ++I;
       if (!CI) continue;

       // Ignore indirect calls.
       Function *Callee = CI->getCalledFunction();
       if (Callee == 0) continue;

       LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";
                  dbgs().flush());
       if(Simplifier.fold(CI, AA))
         Changed = true;
     }
   }
   return Changed;
 }

 bool AMDGPUUseNativeCalls::runOnFunction(Function &F) {
   if (skipFunction(F) || UseNative.empty())
     return false;

   bool Changed = false;
   for (auto &BB : F) {
     for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
       // Ignore non-calls.
       CallInst *CI = dyn_cast<CallInst>(I);
       ++I;
       if (!CI) continue;

       // Ignore indirect calls.
       Function *Callee = CI->getCalledFunction();
       if (Callee == 0) continue;

       if(Simplifier.useNative(CI))
         Changed = true;
     }
   }
   return Changed;
 }
llvm::ConstantDataVector
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double, and whose elements are just simple data values (i.e.
Definition: Constants.h:762

llvm::AMDGPULibFuncBase::EI_EXP
Definition: AMDGPULibFunc.h:85

llvm::AMDGPULibFuncBase::EI_ATANH
Definition: AMDGPULibFunc.h:53

llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:68

llvm::AMDGPULibFuncBase::EI_TANH
Definition: AMDGPULibFunc.h:194

llvm::AMDGPU::HSAMD::Kernel::Arg::Key::Align
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Definition: AMDGPUMetadata.h:161

llvm::TargetOptions
Definition: TargetOptions.h:107

Loads.h

llvm::Intrinsic::log
Definition: Intrinsics.h:190

tbl_sqrt
static const TableEntry tbl_sqrt[]
Definition: AMDGPULibCalls.cpp:366

tbl_log10
static const TableEntry tbl_log10[]
Definition: AMDGPULibCalls.cpp:346

llvm::AMDGPULibFuncBase::EI_SINH
Definition: AMDGPULibFunc.h:173

llvm::errs
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:854

llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:32

llvm::AMDGPULibFuncBase::EI_ASINPI
Definition: AMDGPULibFunc.h:47

Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:72

llvm::IRBuilder::CreateAlloca
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Definition: IRBuilder.h:1344

llvm::User::dropAllReferences
void dropAllReferences()
Drop all references to operands.
Definition: User.h:295

llvm::AMDGPULibFunc::getId
EFuncId getId() const
Definition: AMDGPULibFunc.h:373

Instructions.h

llvm::IRBuilder::CreateAddrSpaceCast
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1737

llvm
This class represents lattice values for constants.
Definition: AllocatorList.h:24

llvm::AMDGPULibFuncBase::EI_WRITE_PIPE_4
Definition: AMDGPULibFunc.h:241

llvm::FunctionType::getParamType
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135

llvm::ConstantDataSequential::getElementAsConstant
Constant * getElementAsConstant(unsigned i) const
Return a Constant for a specified index&#39;s element.
Definition: Constants.cpp:2761

DerivedTypes.h

llvm::IRBuilder::CreateLoad
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve &#39;CreateLoad(Ty, Ptr, "...")&#39; correctly, instead of converting the string to &#39;bool...
Definition: IRBuilder.h:1357

llvm::Module
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65

llvm::AMDGPULibFuncBase::HALF
Definition: AMDGPULibFunc.h:249

llvm::AMDGPULibFuncBase::EI_EXPM1
Definition: AMDGPULibFunc.h:88

llvm::AllocaInst::setAlignment
void setAlignment(unsigned Align)
Definition: Instructions.cpp:1103

llvm::cl::ValueOptional
Definition: CommandLine.h:138

tbl_cospi
static const TableEntry tbl_cospi[]
Definition: AMDGPULibCalls.cpp:307

simplifylib
amdgpu simplifylib
Definition: AMDGPULibCalls.cpp:212

llvm::CallInst::Create
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1481

llvm::AMDGPULibFuncBase::EI_POWN
Definition: AMDGPULibFunc.h:151

llvm::cl::list
Definition: CommandLine.h:1492

ValueSymbolTable.h

tbl_cbrt
static const TableEntry tbl_cbrt[]
Definition: AMDGPULibCalls.cpp:293

llvm::AMDGPULibFunc::parse
static bool parse(StringRef MangledName, AMDGPULibFunc &Ptr)
Definition: AMDGPULibFunc.cpp:686

llvm::SmallVectorTemplateBase< T >::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:218

llvm::initializeAMDGPUSimplifyLibCallsPass
void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &)

llvm::AMDGPULibFuncBase::EI_EXP2
Definition: AMDGPULibFunc.h:87

Debug.h

llvm::CallInst
This class represents a function call, abstracting a target machine&#39;s calling convention.
Definition: Instructions.h:1438

AMDGPU.h

llvm::PointerType::get
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:630

llvm::AMDGPULibCalls::isUnsafeMath
bool isUnsafeMath(const CallInst *CI) const
Definition: AMDGPULibCalls.cpp:488

llvm::AMDGPULibFuncBase::EFuncId
EFuncId
Definition: AMDGPULibFunc.h:23

llvm::APFloat::convertToFloat
float convertToFloat() const
Definition: APFloat.h:1098

llvm::AMDGPULibFuncBase::EI_TGAMMA
Definition: AMDGPULibFunc.h:196

AMDGPUAS::PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:261

llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:705

llvm::AMDGPULibCalls::initNativeFuncs
void initNativeFuncs()
Definition: AMDGPULibCalls.cpp:502

F
F(f)

llvm::AMDGPULibFuncBase::EType
EType
Definition: AMDGPULibFunc.h:252

llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503

llvm::Function
Definition: Function.h:60

llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:168

tbl_erf
static const TableEntry tbl_erf[]
Definition: AMDGPULibCalls.cpp:315

llvm::AMDGPULibFuncBase::EI_ERF
Definition: AMDGPULibFunc.h:83

llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177

llvm::AttrBuilder::addAttribute
AttrBuilder & addAttribute(Attribute::AttrKind Val)
Add an attribute to the builder.
Definition: Attributes.cpp:1340

llvm::User
Definition: User.h:45

llvm::cl::CommaSeparated
Definition: CommandLine.h:176

llvm::AMDGPULibFuncBase::EI_NRSQRT
Definition: AMDGPULibFunc.h:227

llvm::AMDGPULibFunc::setPrefix
void setPrefix(ENamePrefix PFX)
Definition: AMDGPULibFunc.h:390

llvm::AMDGPULibFuncBase::EI_SINCOS
Definition: AMDGPULibFunc.h:172

llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:33

llvm::cl::Hidden
Definition: CommandLine.h:145

llvm::Intrinsic::sqrt
Definition: Intrinsics.h:262

tbl_acosh
static const TableEntry tbl_acosh[]
Definition: AMDGPULibCalls.cpp:252

llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:269

DEBUG_WITH_TYPE
#define DEBUG_WITH_TYPE(TYPE, X)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition: Debug.h:65

llvm::IRBuilderBase::getInt32Ty
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:347

llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1135

tbl_atanh
static const TableEntry tbl_atanh[]
Definition: AMDGPULibCalls.cpp:283

llvm::AMDGPULibFunc::getName
std::string getName() const
Get unmangled name for mangled library function and name for unmangled library function.
Definition: AMDGPULibFunc.h:371

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:66

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51

Name
amdgpu Simplify well known AMD library false Value Value const Twine & Name
Definition: AMDGPULibCalls.cpp:221

llvm::AttrBuilder::hasAttributes
bool hasAttributes() const
Return true if the builder has IR-level attributes.
Definition: Attributes.cpp:1533

llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:371

llvm::AMDGPULibFuncBase::EI_COSPI
Definition: AMDGPULibFunc.h:76

llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81

llvm::IRBuilderBase::getInt64Ty
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:352

llvm::Intrinsic::cos
Definition: Intrinsics.h:90

llvm::FindAvailableLoadedValue
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, AliasAnalysis *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:321

llvm::Module::getContext
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:244

StringSet.h

llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197

llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:743

llvm::AMDGPULibFunc::getLeads
Param * getLeads()
Get leading parameters for mangled lib functions.
Definition: AMDGPULibFunc.cpp:1047

llvm::CallBase::setCalledFunction
void setCalledFunction(Value *Fn)
Sets the function called, including updating the function type.
Definition: InstrTypes.h:1210

llvm::Intrinsic::sin
Definition: Intrinsics.h:258

tbl_sinh
static const TableEntry tbl_sinh[]
Definition: AMDGPULibCalls.cpp:358

false
Definition: StackSlotColoring.cpp:142

llvm::AMDGPULibFunc::getNumArgs
unsigned getNumArgs() const
Definition: AMDGPULibFunc.h:372

llvm::Instruction
Definition: Instruction.h:44

llvm::AMDGPULibCalls
Definition: AMDGPULibCalls.cpp:64

INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib", "Simplify well-known AMD library calls", false, false) INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls

llvm::ConstantAggregateZero
All zero aggregate value.
Definition: Constants.h:341

llvm::AMDGPULibFunc::setId
void setId(EFuncId Id)
Definition: AMDGPULibFunc.h:380

llvm::IRBuilder::CreateBitCast
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1732

llvm::TargetOptions::NoNaNsFPMath
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
Definition: TargetOptions.h:150

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245

false
amdgpu Simplify well known AMD library false
Definition: AMDGPULibCalls.cpp:212

llvm::Intrinsic::ID
ID
Definition: Intrinsics.h:37

tbl_sinpi
static const TableEntry tbl_sinpi[]
Definition: AMDGPULibCalls.cpp:362

llvm::ConstantFP::isNegative
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:309

tbl_asinh
static const TableEntry tbl_asinh[]
Definition: AMDGPULibCalls.cpp:267

tbl_tanpi
static const TableEntry tbl_tanpi[]
Definition: AMDGPULibCalls.cpp:379

llvm::AMDGPULibFuncBase::EI_SINPI
Definition: AMDGPULibFunc.h:174

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33

llvm::AMDGPULibFuncBase::EI_ATAN
Definition: AMDGPULibFunc.h:50

llvm::IRBuilder::CreateSIToFP
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1709

llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:321

llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429

llvm::AMDGPULibCalls::CI
CallInst * CI
Definition: AMDGPULibCalls.cpp:146

llvm::AMDGPULibFuncBase::EI_MAD
Definition: AMDGPULibFunc.h:134

llvm::IRBuilder::CreateZExtOrBitCast
Value * CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1742

Callee
amdgpu Simplify well known AMD library false Value * Callee
Definition: AMDGPULibCalls.cpp:220

llvm::Intrinsic::pow
Definition: Intrinsics.h:243

llvm::IRBuilderBase::SetInsertPoint
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:127

tbl_log2
static const TableEntry tbl_log2[]
Definition: AMDGPULibCalls.cpp:342

TableRef::TableRef
TableRef()
Definition: AMDGPULibCalls.cpp:417

llvm::AMDGPULibFuncBase::Param::PtrKind
unsigned char PtrKind
Definition: AMDGPULibFunc.h:294

llvm::IRBuilder::CreateOr
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1182

getArgType
static AMDGPULibFunc::EType getArgType(const AMDGPULibFunc &FInfo)
Definition: AMDGPULibCalls.cpp:471

llvm::AttributeList::FunctionIndex
Definition: Attributes.h:331

llvm::Function::getEntryBlock
const BasicBlock & getEntryBlock() const
Definition: Function.h:640

runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:66

P
#define P(N)

llvm::AMDGPULibFuncBase::EI_LOG10
Definition: AMDGPULibFunc.h:130

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423

TableRef::TableRef
TableRef(const TableEntry(&tbl)[N])
Definition: AMDGPULibCalls.cpp:420

llvm::Function::getReturnType
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:169

llvm::AMDGPULibCalls::fold
bool fold(CallInst *CI, AliasAnalysis *AA=nullptr)
Definition: AMDGPULibCalls.cpp:636

llvm::TargetOptions::UnsafeFPMath
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
Definition: TargetOptions.h:138

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

llvm::IRBuilder::CreateFMul
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1247

llvm::AAResults
Definition: AliasAnalysis.h:289

tbl_exp2
static const TableEntry tbl_exp2[]
Definition: AMDGPULibCalls.cpp:324

llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:429

llvm::AMDGPULibFuncBase::EI_SQRT
Definition: AMDGPULibFunc.h:176

llvm::Value::hasName
bool hasName() const
Definition: Value.h:251

llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58

llvm::AMDGPULibFunc::getFunction
static Function * getFunction(llvm::Module *M, const AMDGPULibFunc &fInfo)
Definition: AMDGPULibFunc.cpp:950

llvm::ConstantDataVector::getSplatValue
Constant * getSplatValue() const
If this is a splat constant, meaning that all of the elements have the same value, return that value.
Definition: Constants.cpp:2798

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69

llvm::AttrBuilder
Definition: Attributes.h:693

AMDGPUAS::FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:255

llvm::AMDGPULibFuncBase::EI_ROOTN
Definition: AMDGPULibFunc.h:162

E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:42

Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...

llvm::AMDGPULibFuncBase::Param::VectorSize
unsigned char VectorSize
Definition: AMDGPULibFunc.h:293

llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264

llvm::cl::desc
Definition: CommandLine.h:394

EnablePreLink
static cl::opt< bool > EnablePreLink("amdgpu-prelink", cl::desc("Enable pre-link mode optimizations"), cl::init(false), cl::Hidden)

llvm::APFloat::convertToDouble
double convertToDouble() const
Definition: APFloat.h:1097

llvm::ConstantDataSequential::getElementAsFloat
float getElementAsFloat(unsigned i) const
If this is an sequential container of floats, return the specified element as a float.
Definition: Constants.cpp:2749

tbl_atan
static const TableEntry tbl_atan[]
Definition: AMDGPULibCalls.cpp:277

tbl_sin
static const TableEntry tbl_sin[]
Definition: AMDGPULibCalls.cpp:354

llvm::AMDGPULibFunc::isMangled
bool isMangled() const
Definition: AMDGPULibFunc.h:379

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:43

tbl_rsqrt
static const TableEntry tbl_rsqrt[]
Definition: AMDGPULibCalls.cpp:350

llvm::Intrinsic::exp
Definition: Intrinsics.h:114

tbl_acospi
static const TableEntry tbl_acospi[]
Definition: AMDGPULibCalls.cpp:255

llvm::CallBase::setCallingConv
void setCallingConv(CallingConv::ID CC)
Definition: InstrTypes.h:1229

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285

llvm::FunctionType::get
static FunctionType * get(Type *Result, ArrayRef< Type *> Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:297

llvm::AMDGPULibFunc
Wrapper class for AMDGPULIbFuncImpl.
Definition: AMDGPULibFunc.h:357

tbl_expm1
static const TableEntry tbl_expm1[]
Definition: AMDGPULibCalls.cpp:334

llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82

getVecSize
static int getVecSize(const AMDGPULibFunc &FInfo)
Definition: AMDGPULibCalls.cpp:467

llvm::AMDGPULibFuncBase::EI_ACOSH
Definition: AMDGPULibFunc.h:40

llvm::ConstantDataVector::getSplat
static Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:2638

llvm::TargetOptions::NoInfsFPMath
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
Definition: TargetOptions.h:144

llvm::AMDGPULibFuncBase::EI_RSQRT
Definition: AMDGPULibFunc.h:165

llvm::log2
static double log2(double V)
Definition: AMDGPULibCalls.cpp:850

llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:193

llvm::AMDGPULibFuncBase::getEPtrKindFromAddrSpace
static unsigned getEPtrKindFromAddrSpace(unsigned AS)
Definition: AMDGPULibFunc.h:312

llvm::AMDGPULibFuncBase::EI_TANPI
Definition: AMDGPULibFunc.h:195

llvm::AMDGPULibFuncBase::EI_ERFC
Definition: AMDGPULibFunc.h:84

llvm::AMDGPULibFuncBase::F64
Definition: AMDGPULibFunc.h:272

llvm::AMDGPULibFuncBase::NATIVE
Definition: AMDGPULibFunc.h:248

llvm::SmallVectorBase::size
size_t size() const
Definition: SmallVector.h:53

llvm::initializeAMDGPUUseNativeCallsPass
void initializeAMDGPUUseNativeCallsPass(PassRegistry &)

llvm::find
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1207

llvm::AMDGPULibFuncBase::EI_LOG
Definition: AMDGPULibFunc.h:129

llvm::Value::printAsOperand
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:4225

INITIALIZE_PASS_END
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
Definition: RegBankSelect.cpp:69

tbl_atanpi
static const TableEntry tbl_atanpi[]
Definition: AMDGPULibCalls.cpp:287

llvm::AMDGPULibFuncBase::EI_ASINH
Definition: AMDGPULibFunc.h:46

llvm::AMDGPULibFuncBase::EI_READ_PIPE_2
Definition: AMDGPULibFunc.h:238

llvm::Attribute
Definition: Attributes.h:51

llvm::ConstantFP::getValueAPF
const APFloat & getValueAPF() const
Definition: Constants.h:303

tbl_exp10
static const TableEntry tbl_exp10[]
Definition: AMDGPULibCalls.cpp:329

llvm::AMDGPULibFuncBase::EI_NCOS
Definition: AMDGPULibFunc.h:222

llvm::AMDGPULibFuncBase::EI_NLOG2
Definition: AMDGPULibFunc.h:225

llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:58

llvm::AMDGPULibFuncBase::EI_COSH
Definition: AMDGPULibFunc.h:75

llvm::AMDGPULibFuncBase::EI_RECIP
Definition: AMDGPULibFunc.h:155

INITIALIZE_PASS
TargetPassConfig.
Definition: TargetPassConfig.cpp:281

llvm::AMDGPULibFuncBase::EI_CBRT
Definition: AMDGPULibFunc.h:67

llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:84

StringRef.h

tbl_asinpi
static const TableEntry tbl_asinpi[]
Definition: AMDGPULibCalls.cpp:271

llvm::AMDGPULibFuncBase::EI_POW
Definition: AMDGPULibFunc.h:150

TargetOptions.h

setFastFlags
static bool setFastFlags(Function &F, const TargetOptions &Options)
Definition: AMDGPULibCalls.cpp:1690

llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130

llvm::AMDGPULibFunc::getPrefix
ENamePrefix getPrefix() const
Definition: AMDGPULibFunc.h:374

llvm::AMDGPULibFuncBase::EI_NFMA
Definition: AMDGPULibFunc.h:224

llvm::SmallVector
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847

llvm::FPMathOperator
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:240

Module.h
Module.h This file contains the declarations for the Module class.

llvm::Type::getIntNTy
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:180

llvm::ConstantFP::get
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:685

TableRef::table
const TableEntry * table
Definition: AMDGPULibCalls.cpp:415

llvm::CallBase::getAttributes
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1244

tbl_log
static const TableEntry tbl_log[]
Definition: AMDGPULibCalls.cpp:338

llvm::AMDGPULibFuncBase::EI_POWR
Definition: AMDGPULibFunc.h:152

llvm::AMDGPULibFuncBase::EI_SIN
Definition: AMDGPULibFunc.h:171

llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133

llvm::Function::getFunctionType
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:164

llvm::VectorType
Class to represent vector types.
Definition: DerivedTypes.h:393

llvm::AMDGPULibFunc::getOrInsertFunction
static Function * getOrInsertFunction(llvm::Module *M, const AMDGPULibFunc &fInfo)
Definition: AMDGPULibFunc.cpp:964

llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:56

tbl_cosh
static const TableEntry tbl_cosh[]
Definition: AMDGPULibCalls.cpp:303

llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:400

llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90

llvm::IRBuilder::CreateShl
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1103

llvm::StringRef::equals
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
Definition: StringRef.h:169

llvm::IRBuilder::CreatePointerCast
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1778

llvm::raw_ostream::flush
void flush()
Definition: raw_ostream.h:142

Function.h

Arg
amdgpu Simplify well known AMD library false Value Value * Arg
Definition: AMDGPULibCalls.cpp:220

llvm::DataLayout::getTypeAllocSize
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:436

TableRef::size
size_t size
Definition: AMDGPULibCalls.cpp:414

AliasAnalysis.h

llvm::Value::getNumUses
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:160

llvm::CallBase::getNumArgOperands
unsigned getNumArgOperands() const
Definition: InstrTypes.h:1133

llvm::cl::opt
Definition: CommandLine.h:1300

tbl_exp
static const TableEntry tbl_exp[]
Definition: AMDGPULibCalls.cpp:319

llvm::AMDGPULibFuncBase::EI_FMA
Definition: AMDGPULibFunc.h:95

llvm::AMDGPULibFuncBase::EI_TAN
Definition: AMDGPULibFunc.h:193

llvm::Attribute::getValueAsString
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195

tbl_tanh
static const TableEntry tbl_tanh[]
Definition: AMDGPULibCalls.cpp:375

llvm::ConstantFP::isZero
bool isZero() const
Return true if the value is positive or negative zero.
Definition: Constants.h:306

llvm::AMDGPULibFuncBase::EI_NSIN
Definition: AMDGPULibFunc.h:228

llvm::ConstantDataSequential::getElementAsDouble
double getElementAsDouble(unsigned i) const
If this is an sequential container of doubles, return the specified element as a double.
Definition: Constants.cpp:2755

llvm::Value::getName
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214

llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1181

llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107

llvm::AMDGPULibFuncBase::EI_EXP10
Definition: AMDGPULibFunc.h:86

I
#define I(x, y, z)
Definition: MD5.cpp:58

getOptTable
static TableRef getOptTable(AMDGPULibFunc::EFuncId id)
Definition: AMDGPULibCalls.cpp:423

N
#define N

MATH_PI
#define MATH_PI
Definition: AMDGPULibCalls.cpp:49

llvm::createAMDGPUSimplifyLibCallsPass
FunctionPass * createAMDGPUSimplifyLibCallsPass(const TargetOptions &)
Definition: AMDGPULibCalls.cpp:1682

llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1213

tbl_asin
static const TableEntry tbl_asin[]
Definition: AMDGPULibCalls.cpp:261

llvm::IRBuilder::CreateFPToSI
Value * CreateFPToSI(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1701

llvm::CallBase::setAttributes
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
Definition: InstrTypes.h:1248

llvm::AMDGPULibFuncBase::Param::ArgType
unsigned char ArgType
Definition: AMDGPULibFunc.h:292

UseNative
static cl::list< std::string > UseNative("amdgpu-use-native", cl::desc("Comma separated list of functions to replace with native, or all"), cl::CommaSeparated, cl::ValueOptional, cl::Hidden)

llvm::dyn_cast
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323

Size
uint32_t Size
Definition: Profile.cpp:47

llvm::IRBuilder::CreateCall
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1974

llvm::AMDGPULibFuncBase::EI_ACOSPI
Definition: AMDGPULibFunc.h:41

MATH_SQRT2
#define MATH_SQRT2
Definition: AMDGPULibCalls.cpp:51

llvm::AMDGPULibFuncBase::EI_NEXP2
Definition: AMDGPULibFunc.h:223

tbl_tan
static const TableEntry tbl_tan[]
Definition: AMDGPULibCalls.cpp:371

tbl_cos
static const TableEntry tbl_cos[]
Definition: AMDGPULibCalls.cpp:299

llvm::AMDGPULibFuncBase::EI_ATANPI
Definition: AMDGPULibFunc.h:54

llvm::IRBuilder::CreateFAdd
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1213

llvm::IRBuilder::CreateAnd
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1164

HasNative
static bool HasNative(AMDGPULibFunc::EFuncId id)
Definition: AMDGPULibCalls.cpp:390

IRBuilder.h

llvm::IRBuilder::CreateFDiv
Value * CreateFDiv(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1264

llvm::to_string
const std::string to_string(const T &Value)
Definition: ScopedPrinter.h:62

llvm::AMDGPULibFuncBase::EI_ASIN
Definition: AMDGPULibFunc.h:45

LLVMContext.h

llvm::AMDGPULibFuncBase::EI_NSQRT
Definition: AMDGPULibFunc.h:229

llvm::AMDGPULibCalls::replaceCall
void replaceCall(Value *With)
Definition: AMDGPULibCalls.cpp:150

llvm::Function::addAttributes
void addAttributes(unsigned i, const AttrBuilder &Attrs)
adds the attributes to the list of attributes.
Definition: Function.cpp:380

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

llvm::createAMDGPUUseNativeCallsPass
FunctionPass * createAMDGPUUseNativeCallsPass()
Definition: AMDGPULibCalls.cpp:1686

const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232

llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566

llvm::Value
LLVM Value Representation.
Definition: Value.h:73

TableRef
Definition: AMDGPULibCalls.cpp:413

calls
amdgpu Simplify well known AMD library calls
Definition: AMDGPULibCalls.cpp:212

llvm::VectorType::get
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:606

llvm::AMDGPULibFuncBase::F32
Definition: AMDGPULibFunc.h:271

tbl_erfc
static const TableEntry tbl_erfc[]
Definition: AMDGPULibCalls.cpp:311

llvm::IRBuilderBase::setFastMathFlags
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:220

llvm::IRBuilderBase::GetInsertPoint
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:122

llvm::Function::getFnAttribute
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:331

llvm::AMDGPULibFunc::mangle
std::string mangle() const
Definition: AMDGPULibFunc.h:387

llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:242

llvm::SequentialType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:360

llvm::ConstantFP::isExactlyValue
bool isExactlyValue(const APFloat &V) const
We don&#39;t rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: Constants.cpp:824

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49

raw_ostream.h

llvm::ConstantDataSequential::getNumElements
unsigned getNumElements() const
Return the number of elements in the array or vector.
Definition: Constants.cpp:2444

llvm::AMDGPULibFuncBase::EI_COS
Definition: AMDGPULibFunc.h:74

llvm::AMDGPULibFuncBase::EI_DIVIDE
Definition: AMDGPULibFunc.h:81

llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
Definition: AliasAnalysis.h:1040

llvm::AMDGPULibFuncBase::EI_ACOS
Definition: AMDGPULibFunc.h:39

llvm::AMDGPULibFuncBase::EI_FABS
Definition: AMDGPULibFunc.h:89

llvm::ConstantInt::getSExtValue
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:157

llvm::ConstantDataVector::get
static Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
Definition: Constants.cpp:2583

llvm::AMDGPULibFuncBase::EI_LOG2
Definition: AMDGPULibFunc.h:132

LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:123

llvm::Type::isDoubleTy
bool isDoubleTy() const
Return true if this is &#39;double&#39;, a 64-bit IEEE fp type.
Definition: Type.h:150

AMDGPULibFunc.h

tbl_tgamma
static const TableEntry tbl_tgamma[]
Definition: AMDGPULibCalls.cpp:383

llvm::AMDGPULibFuncBase::EI_READ_PIPE_4
Definition: AMDGPULibFunc.h:239

llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:374

llvm::AMDGPULibCalls::useNative
bool useNative(CallInst *CI)
Definition: AMDGPULibCalls.cpp:542

tbl_acos
static const TableEntry tbl_acos[]
Definition: AMDGPULibCalls.cpp:246

llvm::AMDGPULibFuncBase::NOPFX
Definition: AMDGPULibFunc.h:247

llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:67

llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:60

MATH_E
#define MATH_E
Definition: AMDGPULibCalls.cpp:50

llvm::AMDGPULibFuncBase::EI_WRITE_PIPE_2
Definition: AMDGPULibFunc.h:240