62 #define DEBUG_TYPE "mergeicmps" 66 if (
const LoadInst *LI = dyn_cast<LoadInst>(I))
67 return LI->isSimple();
69 return SI->isSimple();
79 :
GEP(GEP), LoadI(LoadI), BaseId(BaseId),
Offset(Offset) {}
92 return BaseId != O.BaseId ? BaseId < O.BaseId : Offset.
slt(O.Offset);
103 class BaseIdentifier {
108 assert(Base &&
"invalid base");
109 const auto Insertion = BaseToIndex.try_emplace(Base, Order);
110 if (Insertion.second)
112 return Insertion.first->second;
123 BCEAtom visitICmpLoadOperand(
Value *
const Val, BaseIdentifier &BaseId) {
128 if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) {
133 if (!LoadI->isSimple()) {
137 Value *
const Addr = LoadI->getOperand(0);
142 if (
GEP->isUsedOutsideOfBlock(LoadI->getParent())) {
146 const auto &DL =
GEP->getModule()->getDataLayout();
154 if (!
GEP->accumulateConstantOffset(DL, Offset))
156 return BCEAtom(
GEP, LoadI, BaseId.getBaseId(
GEP->getPointerOperand()),
174 BCECmpBlock(BCEAtom L, BCEAtom R,
int SizeBits)
175 : Lhs_(L), Rhs_(R), SizeBits_(SizeBits) {
179 bool IsValid()
const {
return Lhs_.BaseId != 0 && Rhs_.BaseId != 0; }
183 void AssertConsistent()
const {
191 const BCEAtom &Lhs()
const {
return Lhs_; }
192 const BCEAtom &Rhs()
const {
return Rhs_; }
193 int SizeBits()
const {
return SizeBits_; }
196 bool doesOtherWork()
const;
221 bool RequireSplit =
false;
229 bool BCECmpBlock::canSinkBCECmpInst(
const Instruction *Inst,
236 if (!isSimpleLoadOrStore(Inst))
247 for (
auto BI : BlockInsts) {
256 {Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI});
259 if (BlockInsts.
count(&Inst))
261 assert(canSinkBCECmpInst(&Inst, BlockInsts, AA) &&
262 "Split unsplittable block");
265 OtherInsts.push_back(&Inst);
276 {Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI});
278 if (!BlockInsts.
count(&Inst)) {
279 if (!canSinkBCECmpInst(&Inst, BlockInsts, AA))
286 bool BCECmpBlock::doesOtherWork()
const {
290 {Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI});
296 if (!BlockInsts.
count(&Inst))
304 BCECmpBlock visitICmp(
const ICmpInst *
const CmpI,
306 BaseIdentifier &BaseId) {
321 auto Lhs = visitICmpLoadOperand(CmpI->
getOperand(0), BaseId);
324 auto Rhs = visitICmpLoadOperand(CmpI->
getOperand(1), BaseId);
328 return BCECmpBlock(std::move(Lhs), std::move(Rhs),
336 BaseIdentifier &BaseId) {
337 if (Block->
empty())
return {};
339 if (!BranchI)
return {};
341 if (BranchI->isUnconditional()) {
347 if (!CmpI)
return {};
351 Result.BranchI = BranchI;
358 if (!Const->isZero())
return {};
361 if (!CmpI)
return {};
363 assert(BranchI->getNumSuccessors() == 2 &&
"expecting a cond branch");
364 BasicBlock *
const FalseBlock = BranchI->getSuccessor(1);
365 auto Result = visitICmp(
369 Result.BranchI = BranchI;
375 static inline void enqueueBlock(std::vector<BCECmpBlock> &Comparisons,
376 BCECmpBlock &Comparison) {
378 <<
"': Found cmp of " << Comparison.SizeBits()
379 <<
" bits between " << Comparison.Lhs().BaseId <<
" + " 380 << Comparison.Lhs().Offset <<
" and " 381 << Comparison.Rhs().BaseId <<
" + " 382 << Comparison.Rhs().Offset <<
"\n");
384 Comparisons.push_back(Comparison);
390 BCECmpChain(
const std::vector<BasicBlock *> &Blocks,
PHINode &Phi,
393 int size()
const {
return Comparisons_.size(); }
395 #ifdef MERGEICMPS_DOT_ON 397 #endif // MERGEICMPS_DOT_ON 402 static bool IsContiguous(
const BCECmpBlock &First,
403 const BCECmpBlock &Second) {
404 return First.Lhs().BaseId == Second.Lhs().BaseId &&
405 First.Rhs().BaseId == Second.Rhs().BaseId &&
406 First.Lhs().Offset + First.SizeBits() / 8 == Second.Lhs().Offset &&
407 First.Rhs().Offset + First.SizeBits() / 8 == Second.Rhs().Offset;
418 std::vector<BCECmpBlock> Comparisons_;
423 BCECmpChain::BCECmpChain(
const std::vector<BasicBlock *> &Blocks,
PHINode &Phi,
426 assert(!Blocks.empty() &&
"a chain should have at least one block");
428 std::vector<BCECmpBlock> Comparisons;
429 BaseIdentifier BaseId;
430 for (
size_t BlockIdx = 0; BlockIdx < Blocks.size(); ++BlockIdx) {
432 assert(Block &&
"invalid block");
435 Comparison.BB = Block;
436 if (!Comparison.IsValid()) {
437 LLVM_DEBUG(
dbgs() <<
"chain with invalid BCECmpBlock, no merge.\n");
440 if (Comparison.doesOtherWork()) {
442 <<
"' does extra work besides compare\n");
443 if (Comparisons.empty()) {
457 if (Comparison.canSplit(AA)) {
459 <<
"Split initial block '" << Comparison.BB->getName()
460 <<
"' that does extra work besides compare\n");
461 Comparison.RequireSplit =
true;
462 enqueueBlock(Comparisons, Comparison);
465 <<
"ignoring initial block '" << Comparison.BB->getName()
466 <<
"' that does extra work besides compare\n");
495 enqueueBlock(Comparisons, Comparison);
499 if (Comparisons.empty()) {
500 LLVM_DEBUG(
dbgs() <<
"chain with no BCE basic blocks, no merge\n");
503 EntryBlock_ = Comparisons[0].BB;
504 Comparisons_ = std::move(Comparisons);
505 #ifdef MERGEICMPS_DOT_ON 506 errs() <<
"BEFORE REORDERING:\n\n";
508 #endif // MERGEICMPS_DOT_ON 512 [](
const BCECmpBlock &LhsBlock,
const BCECmpBlock &RhsBlock) {
513 return LhsBlock.Lhs() < RhsBlock.Lhs();
515 #ifdef MERGEICMPS_DOT_ON 516 errs() <<
"AFTER REORDERING:\n\n";
518 #endif // MERGEICMPS_DOT_ON 521 #ifdef MERGEICMPS_DOT_ON 523 errs() <<
"digraph dag {\n";
524 errs() <<
" graph [bgcolor=transparent];\n";
525 errs() <<
" node [color=black,style=filled,fillcolor=lightyellow];\n";
526 errs() <<
" edge [color=black];\n";
527 for (
size_t I = 0;
I < Comparisons_.size(); ++
I) {
528 const auto &Comparison = Comparisons_[
I];
529 errs() <<
" \"" <<
I <<
"\" [label=\"%" 530 << Comparison.Lhs().Base()->getName() <<
" + " 531 << Comparison.Lhs().Offset <<
" == %" 532 << Comparison.Rhs().Base()->getName() <<
" + " 533 << Comparison.Rhs().Offset <<
" (" << (Comparison.SizeBits() / 8)
535 const Value *
const Val = Phi_.getIncomingValueForBlock(Comparison.BB);
536 if (
I > 0)
errs() <<
" \"" << (
I - 1) <<
"\" -> \"" <<
I <<
"\";\n";
537 errs() <<
" \"" <<
I <<
"\" -> \"Phi\" [label=\"" << *Val <<
"\"];\n";
539 errs() <<
" \"Phi\" [label=\"Phi\"];\n";
542 #endif // MERGEICMPS_DOT_ON 549 bool AtLeastOneMerged =
false;
550 for (
size_t I = 1;
I < Comparisons_.size(); ++
I) {
551 if (IsContiguous(Comparisons_[
I - 1], Comparisons_[
I])) {
552 AtLeastOneMerged =
true;
556 if (!AtLeastOneMerged)
return false;
561 for (
const auto &Comparison : Comparisons_) {
562 Phi_.removeIncomingValue(Comparison.BB,
false);
568 for (
size_t I = 1;
I < Comparisons_.size(); ++
I) {
569 if (Entry == Comparisons_[
I].BB) {
579 if (EntryBlock_ != Comparisons_[0].BB) {
580 EntryBlock_->replaceAllUsesWith(Comparisons_[0].BB);
581 EntryBlock_ = Comparisons_[0].BB;
586 for (
size_t I = 1;
I < Comparisons_.size(); ++
I) {
587 if (IsContiguous(Comparisons_[
I - 1], Comparisons_[
I])) {
592 makeArrayRef(Comparisons_).slice(I - NumMerged, NumMerged),
593 Comparisons_[I].BB, Phi_, TLI, AA);
598 .slice(Comparisons_.size() - NumMerged, NumMerged),
599 nullptr, Phi_, TLI, AA);
610 const auto &FirstComparison = *Comparisons.
begin();
614 if (Comparisons.
size() >= 2) {
619 [](
const BCECmpBlock &
B) {
return B.RequireSplit; });
620 if (
C != Comparisons.
end())
621 C->split(EntryBlock_, AA);
624 const auto TotalSize =
625 std::accumulate(Comparisons.
begin(), Comparisons.
end(), 0,
626 [](
int Size,
const BCECmpBlock &
C) {
627 return Size +
C.SizeBits();
636 FirstComparison.BranchI->eraseFromParent();
637 FirstComparison.CmpI->eraseFromParent();
638 FirstComparison.Lhs().LoadI->eraseFromParent();
639 FirstComparison.Rhs().LoadI->eraseFromParent();
644 FirstComparison.Lhs().GEP, FirstComparison.Rhs().GEP,
660 for (
size_t I = 1;
I < Comparisons.
size(); ++
I) {
670 if (FirstComparison.BranchI->isConditional()) {
675 FirstComparison.BranchI->setSuccessor(0, NextBBInChain);
680 FirstComparison.BranchI->eraseFromParent();
682 Builder.
CreateCondBr(FirstComparison.CmpI, NextBBInChain,
687 if (FirstComparison.BranchI->isConditional()) {
690 FirstComparison.BranchI->eraseFromParent();
702 std::vector<BasicBlock *> getOrderedBlocks(
PHINode &Phi,
706 std::vector<BasicBlock *> Blocks(NumBlocks);
707 assert(LastBlock &&
"invalid last block");
709 for (
int BlockIndex = NumBlocks - 1; BlockIndex > 0; --BlockIndex) {
714 <<
" has its address taken\n");
717 Blocks[BlockIndex] = CurBlock;
719 if (!SinglePredecessor) {
722 <<
" has two or more predecessors\n");
728 <<
" does not link back to the phi\n");
731 CurBlock = SinglePredecessor;
733 Blocks[0] = CurBlock;
778 <<
"skip: non-constant value not from cmp or not from last block.\n");
795 if (Blocks.empty())
return false;
796 BCECmpChain CmpChain(Blocks, Phi, AA);
798 if (CmpChain.size() < 2) {
803 return CmpChain.simplify(TLI, AA);
815 if (skipFunction(F))
return false;
816 const auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
817 const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
818 AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
819 auto PA =
runImpl(F, &TLI, &TTI, AA);
820 return !PA.areAllPreserved();
844 if (!TLI->
has(LibFunc_memcmp))
847 bool MadeChange =
false;
849 for (
auto BBIt = ++F.
begin(); BBIt != F.
end(); ++BBIt) {
851 if (
auto *
const Phi = dyn_cast<PHINode>(&*BBIt->begin()))
852 MadeChange |= processPhi(*Phi, TLI, AA);
863 "Merge contiguous icmps into a memcmp",
false,
false)
Pass interface - Implemented by all 'passes'.
static ConstantInt * getFalse(LLVMContext &Context)
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
static bool runImpl(Function &F, TargetLibraryInfo &TLI, DominatorTree &DT)
This is the entry point for all transforms.
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
This class represents lattice values for constants.
Implements a dense probed hash-table based set.
bool slt(const APInt &RHS) const
Signed less than comparison.
static void dump(StringRef Title, SpillInfo const &Spills)
static std::pair< StringRef, StringRef > split(StringRef Str, char Separator)
Checked version of split, to ensure mandatory subparts.
An instruction for reading from memory.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
LLVMContext & getContext() const
Get the context in which this basic block lives.
iterator begin()
Instruction iterator methods.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Pass * createMergeICmpsPass()
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Type * getType() const
All values are typed, get the type of this value.
bool has(LibFunc F) const
Tests whether a library function is available.
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
An instruction for storing to memory.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Value * getOperand(unsigned i) const
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
const BasicBlock & getEntryBlock() const
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
static bool runOnFunction(Function &F, bool PostInlining)
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
A set of analyses that are preserved following a run of a transformation pass.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
LLVM Basic Block Representation.
This is an important class for using LLVM in a threaded context.
Conditional or Unconditional Branch instruction.
size_t size() const
size - Get the array size.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Represent the analysis usage information of a pass.
This instruction compares its operands according to the predicate given to the constructor.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
FunctionPass class - This class is used to implement most global optimizations.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
void sort(IteratorTy Start, IteratorTy End)
Representation for a specific memory location.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches, switches, etc.
This is the shared class of boolean and integer constants.
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Provides information about what library functions are available for the current target.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
unsigned getNumIncomingValues() const
Return the number of incoming edges.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Class for arbitrary precision integers.
bool isDereferenceablePointer(const Value *V, const DataLayout &DL, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Return true if this is always a dereferenceable pointer.
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
Predicate getPredicate() const
Return the predicate for this instruction.
Merge contiguous icmps into a memcmp
static IntegerType * getInt32Ty(LLVMContext &C)
StringRef getName() const
Return a constant reference to the value's name.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Value * emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, const DataLayout &DL, const TargetLibraryInfo *TLI)
Emit a call to the memcmp function.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool operator<(int64_t V1, const APSInt &V2)
LLVM Value Representation.
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
static const Function * getParent(const Value *V)
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool hasOneUse() const
Return true if there is exactly one user of this value.
INITIALIZE_PASS_BEGIN(MergeICmps, "mergeicmps", "Merge contiguous icmps into a memcmp", false, false) INITIALIZE_PASS_END(MergeICmps
void initializeMergeICmpsPass(PassRegistry &)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
bool empty() const
empty - Check if the array is empty.
const BasicBlock * getParent() const
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.