43 #define DEBUG_TYPE "bypass-slow-division" 51 QuotRemPair(
Value *InQuotient,
Value *InRemainder)
52 : Quotient(InQuotient), Remainder(InRemainder) {}
58 struct QuotRemWithBB {
60 Value *Quotient =
nullptr;
61 Value *Remainder =
nullptr;
78 class FastDivInsertionTask {
79 bool IsValidTask =
false;
84 bool isHashLikeValue(
Value *V, VisitedSetTy &Visited);
87 QuotRemWithBB createFastBB(
BasicBlock *Successor);
88 QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS,
94 return SlowDivOrRem->
getOpcode() == Instruction::SDiv ||
95 SlowDivOrRem->
getOpcode() == Instruction::SRem;
99 return SlowDivOrRem->
getOpcode() == Instruction::SDiv ||
100 SlowDivOrRem->
getOpcode() == Instruction::UDiv;
103 Type *getSlowType() {
return SlowDivOrRem->
getType(); }
106 FastDivInsertionTask(
Instruction *
I,
const BypassWidthsTy &BypassWidths);
108 Value *getReplacement(DivCacheTy &Cache);
113 FastDivInsertionTask::FastDivInsertionTask(
Instruction *
I,
114 const BypassWidthsTy &BypassWidths) {
116 case Instruction::UDiv:
117 case Instruction::SDiv:
118 case Instruction::URem:
119 case Instruction::SRem:
133 auto BI = BypassWidths.find(SlowType->
getBitWidth());
134 if (BI == BypassWidths.end())
152 Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
158 Value *Dividend = SlowDivOrRem->getOperand(0);
159 Value *Divisor = SlowDivOrRem->getOperand(1);
161 auto CacheI = Cache.find(Key);
163 if (CacheI == Cache.end()) {
169 CacheI = Cache.insert({
Key, *OptResult}).
first;
172 QuotRemPair &
Value = CacheI->second;
173 return isDivisionOp() ? Value.Quotient : Value.Remainder;
191 bool FastDivInsertionTask::isHashLikeValue(
Value *V, VisitedSetTy &Visited) {
197 case Instruction::Xor:
199 case Instruction::Mul: {
206 if (!C && isa<BitCastInst>(Op1))
207 C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0));
210 case Instruction::PHI:
213 if (Visited.size() >= 16)
217 if (Visited.find(I) != Visited.end())
223 return getValueRange(V, Visited) == VALRNG_LIKELY_LONG ||
233 VisitedSetTy &Visited) {
234 unsigned ShortLen = BypassType->getBitWidth();
237 assert(LongLen > ShortLen &&
"Value type must be wider than BypassType");
238 unsigned HiBits = LongLen - ShortLen;
240 const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout();
246 return VALRNG_KNOWN_SHORT;
249 return VALRNG_LIKELY_LONG;
255 if (isHashLikeValue(V, Visited))
256 return VALRNG_LIKELY_LONG;
258 return VALRNG_UNKNOWN;
263 QuotRemWithBB FastDivInsertionTask::createSlowBB(
BasicBlock *SuccessorBB) {
264 QuotRemWithBB DivRemPair;
266 MainBB->getParent(), SuccessorBB);
267 IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
269 Value *Dividend = SlowDivOrRem->getOperand(0);
270 Value *Divisor = SlowDivOrRem->getOperand(1);
273 DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor);
274 DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor);
276 DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor);
277 DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor);
280 Builder.CreateBr(SuccessorBB);
286 QuotRemWithBB FastDivInsertionTask::createFastBB(
BasicBlock *SuccessorBB) {
287 QuotRemWithBB DivRemPair;
289 MainBB->getParent(), SuccessorBB);
290 IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
292 Value *Dividend = SlowDivOrRem->getOperand(0);
293 Value *Divisor = SlowDivOrRem->getOperand(1);
294 Value *ShortDivisorV =
295 Builder.CreateCast(Instruction::Trunc, Divisor, BypassType);
296 Value *ShortDividendV =
297 Builder.CreateCast(Instruction::Trunc, Dividend, BypassType);
300 Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV);
301 Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV);
302 DivRemPair.Quotient =
303 Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType());
304 DivRemPair.Remainder =
305 Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType());
306 Builder.CreateBr(SuccessorBB);
312 QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS,
316 PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2);
317 QuoPhi->addIncoming(LHS.Quotient, LHS.BB);
318 QuoPhi->addIncoming(RHS.Quotient, RHS.BB);
319 PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2);
322 return QuotRemPair(QuoPhi, RemPhi);
329 Value *FastDivInsertionTask::insertOperandRuntimeCheck(
Value *Op1,
Value *Op2) {
330 assert((Op1 || Op2) &&
"Nothing to check");
337 OrV = Op1 ? Op1 : Op2;
341 uint64_t BitMask = ~BypassType->getBitMask();
342 Value *AndV = Builder.CreateAnd(OrV, BitMask);
346 return Builder.CreateICmpEQ(AndV, ZeroV);
352 Value *Dividend = SlowDivOrRem->getOperand(0);
353 Value *Divisor = SlowDivOrRem->getOperand(1);
356 ValueRange DividendRange = getValueRange(Dividend, SetL);
357 if (DividendRange == VALRNG_LIKELY_LONG)
361 ValueRange DivisorRange = getValueRange(Divisor, SetR);
362 if (DivisorRange == VALRNG_LIKELY_LONG)
365 bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT);
366 bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT);
368 if (DividendShort && DivisorShort) {
381 return QuotRemPair(ExtDiv, ExtRem);
384 if (isa<ConstantInt>(Divisor)) {
395 if (
auto *BCI = dyn_cast<BitCastInst>(Divisor))
396 if (BCI->getParent() == SlowDivOrRem->getParent() &&
397 isa<ConstantInt>(BCI->getOperand(0)))
419 Long.Remainder = Dividend;
420 QuotRemWithBB
Fast = createFastBB(SuccessorBB);
421 QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB);
423 Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor);
424 Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB);
434 QuotRemWithBB
Fast = createFastBB(SuccessorBB);
435 QuotRemWithBB Slow = createSlowBB(SuccessorBB);
436 QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB);
437 Value *CmpV = insertOperandRuntimeCheck(DividendShort ?
nullptr : Dividend,
438 DivisorShort ?
nullptr : Divisor);
448 const BypassWidthsTy &BypassWidths) {
449 DivCacheTy PerBBDivCache;
451 bool MadeChange =
false;
453 while (Next !=
nullptr) {
459 FastDivInsertionTask Task(I, BypassWidths);
460 if (
Value *Replacement = Task.getReplacement(PerBBDivCache)) {
470 for (
auto &KV : PerBBDivCache)
471 for (
Value *V : {KV.second.Quotient, KV.second.Remainder})
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
A parsed version of the target data layout string in and methods for querying it. ...
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
This class represents lattice values for constants.
LLVMContext & getContext() const
All values hold a context through their type.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
iterator begin()
Instruction iterator methods.
bool bypassSlowDivision(BasicBlock *BB, const DenseMap< unsigned int, unsigned int > &BypassWidth)
This optimization identifies DIV instructions in a BB that can be profitably bypassed and carried out...
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Type * getType() const
All values are typed, get the type of this value.
const APInt & getValue() const
Return the constant as an APInt value reference.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Value * getOperand(unsigned i) const
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
LLVM Basic Block Representation.
The instances of the Type class are immutable: once they are created, they are never changed...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Class to represent integer types.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is a trivially dead instruction, delete it.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
const InstListType & getInstList() const
Return the underlying instruction list container.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
This is the shared class of boolean and integer constants.
Module.h This file contains the declarations for the Module class.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Value * CreateURem(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateUDiv(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
unsigned getIntegerBitWidth() const
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
static int isSignedOp(ISD::CondCode Opcode)
For an integer comparison, return 1 if the comparison is a signed operation and 2 if the result is an...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getMinSignedBits() const
Get the minimum bit size for this signed APInt.
LLVM Value Representation.
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
const BasicBlock * getParent() const