79 #include "llvm/Config/llvm-config.h" 122 using namespace llvm;
124 #define DEBUG_TYPE "loop-reduce" 138 cl::desc(
"Enable LSR phi elimination"));
143 cl::desc(
"Add instruction count to a LSR cost model"));
148 cl::desc(
"Narrow LSR complex solution using" 149 " expectation of registers number"));
155 cl::desc(
"Narrow LSR search space by filtering non-optimal formulae" 156 " with the same ScaledReg and Scale"));
161 cl::desc(
"LSR search space complexity limit"));
167 cl::desc(
"Stress test LSR IV chains"));
179 Type *MemTy =
nullptr;
182 MemAccessTy() =
default;
183 MemAccessTy(
Type *Ty,
unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
186 return MemTy == Other.MemTy && AddrSpace == Other.AddrSpace;
189 bool operator!=(MemAccessTy Other)
const {
return !(*
this == Other); }
192 unsigned AS = UnknownAddressSpace) {
212 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 214 OS <<
"[NumUses=" << UsedByIndices.count() <<
']';
225 class RegUseTracker {
228 RegUsesTy RegUsesMap;
232 void countRegister(
const SCEV *
Reg,
size_t LUIdx);
233 void dropRegister(
const SCEV *
Reg,
size_t LUIdx);
234 void swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx);
236 bool isRegUsedByUsesOtherThan(
const SCEV *
Reg,
size_t LUIdx)
const;
245 iterator
begin() {
return RegSequence.
begin(); }
246 iterator
end() {
return RegSequence.
end(); }
247 const_iterator
begin()
const {
return RegSequence.
begin(); }
248 const_iterator
end()
const {
return RegSequence.
end(); }
254 RegUseTracker::countRegister(
const SCEV *
Reg,
size_t LUIdx) {
255 std::pair<RegUsesTy::iterator, bool> Pair =
256 RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
257 RegSortData &RSD = Pair.first->second;
260 RSD.UsedByIndices.resize(
std::max(RSD.UsedByIndices.size(), LUIdx + 1));
261 RSD.UsedByIndices.set(LUIdx);
265 RegUseTracker::dropRegister(
const SCEV *Reg,
size_t LUIdx) {
266 RegUsesTy::iterator It = RegUsesMap.find(Reg);
267 assert(It != RegUsesMap.end());
268 RegSortData &RSD = It->second;
269 assert(RSD.UsedByIndices.size() > LUIdx);
270 RSD.UsedByIndices.reset(LUIdx);
274 RegUseTracker::swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx) {
275 assert(LUIdx <= LastLUIdx);
279 for (
auto &Pair : RegUsesMap) {
281 if (LUIdx < UsedByIndices.
size())
282 UsedByIndices[LUIdx] =
283 LastLUIdx < UsedByIndices.
size() ? UsedByIndices[LastLUIdx] :
false;
284 UsedByIndices.
resize(std::min(UsedByIndices.
size(), LastLUIdx));
289 RegUseTracker::isRegUsedByUsesOtherThan(
const SCEV *Reg,
size_t LUIdx)
const {
290 RegUsesTy::const_iterator
I = RegUsesMap.find(Reg);
291 if (I == RegUsesMap.end())
295 if (i == -1)
return false;
296 if ((
size_t)i != LUIdx)
return true;
301 RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
302 assert(I != RegUsesMap.end() &&
"Unknown register!");
303 return I->second.UsedByIndices;
320 int64_t BaseOffset = 0;
323 bool HasBaseReg =
false;
346 const SCEV *ScaledReg =
nullptr;
351 int64_t UnfoldedOffset = 0;
363 bool hasZeroEnd()
const;
365 size_t getNumRegs()
const;
368 void deleteBaseReg(
const SCEV *&S);
370 bool referencesReg(
const SCEV *S)
const;
371 bool hasRegsUsedByUsesOtherThan(
size_t LUIdx,
372 const RegUseTracker &RegUses)
const;
393 for (
const SCEV *S :
Add->operands())
400 if (!AR->getStart()->isZero() && AR->isAffine()) {
403 AR->getStepRecurrence(SE),
411 if (
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
412 if (Mul->getOperand(0)->isAllOnesValue()) {
421 for (
const SCEV *S : MyGood)
423 for (
const SCEV *S : MyBad)
442 BaseRegs.push_back(Sum);
448 BaseRegs.push_back(Sum);
459 return BaseRegs.size() <= 1;
464 if (Scale == 1 && BaseRegs.empty())
468 if (SAR && SAR->
getLoop() == &L)
476 return isa<const SCEVAddRecExpr>(S) &&
477 (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
479 return I == BaseRegs.end();
493 assert(!BaseRegs.empty() &&
"1*reg => reg, should not be needed.");
497 ScaledReg = BaseRegs.back();
506 if (!SAR || SAR->
getLoop() != &L) {
509 return isa<const SCEVAddRecExpr>(S) &&
510 (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
512 if (I != BaseRegs.end())
521 bool Formula::unscale() {
525 BaseRegs.push_back(ScaledReg);
530 bool Formula::hasZeroEnd()
const {
531 if (UnfoldedOffset || BaseOffset)
533 if (BaseRegs.size() != 1 || ScaledReg)
540 size_t Formula::getNumRegs()
const {
541 return !!ScaledReg + BaseRegs.size();
547 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
548 ScaledReg ? ScaledReg->getType() :
549 BaseGV ? BaseGV->getType() :
554 void Formula::deleteBaseReg(
const SCEV *&S) {
555 if (&S != &BaseRegs.back())
561 bool Formula::referencesReg(
const SCEV *S)
const {
567 bool Formula::hasRegsUsedByUsesOtherThan(
size_t LUIdx,
568 const RegUseTracker &RegUses)
const {
570 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
572 for (
const SCEV *BaseReg : BaseRegs)
573 if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
578 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 582 if (!First) OS <<
" + ";
else First =
false;
583 BaseGV->printAsOperand(OS,
false);
585 if (BaseOffset != 0) {
586 if (!First) OS <<
" + ";
else First =
false;
589 for (
const SCEV *BaseReg : BaseRegs) {
590 if (!First) OS <<
" + ";
else First =
false;
591 OS <<
"reg(" << *BaseReg <<
')';
593 if (HasBaseReg && BaseRegs.empty()) {
594 if (!First) OS <<
" + ";
else First =
false;
595 OS <<
"**error: HasBaseReg**";
596 }
else if (!HasBaseReg && !BaseRegs.empty()) {
597 if (!First) OS <<
" + ";
else First =
false;
598 OS <<
"**error: !HasBaseReg**";
601 if (!First) OS <<
" + ";
else First =
false;
602 OS << Scale <<
"*reg(";
609 if (UnfoldedOffset != 0) {
610 if (!First) OS <<
" + ";
611 OS <<
"imm(" << UnfoldedOffset <<
')';
652 bool IgnoreSignificantBits =
false) {
674 const APInt &LA =
C->getAPInt();
676 if (LA.
srem(RA) != 0)
683 if ((IgnoreSignificantBits ||
isAddRecSExtable(AR, SE)) && AR->isAffine()) {
685 IgnoreSignificantBits);
686 if (!Step)
return nullptr;
688 IgnoreSignificantBits);
689 if (!Start)
return nullptr;
702 for (
const SCEV *S :
Add->operands()) {
704 if (!Op)
return nullptr;
713 if (
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {
717 for (
const SCEV *S : Mul->operands()) {
720 IgnoreSignificantBits)) {
739 if (
C->getAPInt().getMinSignedBits() <= 64) {
741 return C->getValue()->getSExtValue();
749 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
764 if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
765 if (
GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
775 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
791 bool isAddress = isa<LoadInst>(Inst);
793 if (
SI->getPointerOperand() == OperandVal)
795 }
else if (
IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
798 switch (II->getIntrinsicID()) {
801 if (II->getArgOperand(0) == OperandVal)
806 if (II->getArgOperand(0) == OperandVal ||
807 II->getArgOperand(1) == OperandVal)
813 if (IntrInfo.
PtrVal == OperandVal)
818 }
else if (
AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
819 if (RMW->getPointerOperand() == OperandVal)
822 if (CmpX->getPointerOperand() == OperandVal)
832 if (
const StoreInst *
SI = dyn_cast<StoreInst>(Inst)) {
833 AccessTy.MemTy =
SI->getOperand(0)->getType();
834 AccessTy.AddrSpace =
SI->getPointerAddressSpace();
835 }
else if (
const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
836 AccessTy.AddrSpace = LI->getPointerAddressSpace();
837 }
else if (
const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
838 AccessTy.AddrSpace = RMW->getPointerAddressSpace();
840 AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
841 }
else if (
IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
842 switch (II->getIntrinsicID()) {
845 AccessTy.AddrSpace = II->getArgOperand(0)->getType()->getPointerAddressSpace();
846 AccessTy.MemTy = OperandVal->
getType();
851 AccessTy.MemTy = OperandVal->
getType();
867 if (
PointerType *PTy = dyn_cast<PointerType>(AccessTy.MemTy))
869 PTy->getAddressSpace());
914 if (!Processed.
insert(S).second)
918 for (
const SCEV *S :
Add->operands()) {
925 if (
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
926 if (Mul->getNumOperands() == 2) {
928 if (isa<SCEVConstant>(Mul->getOperand(0)))
933 if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Mul->getOperand(1))) {
934 Value *UVal = U->getValue();
938 if (UI && UI->
getOpcode() == Instruction::Mul &&
960 bool Changed =
false;
962 while (!DeadInsts.
empty()) {
999 const LSRUse &LU,
const Formula &
F);
1003 const LSRUse &LU,
const Formula &F,
1039 assert(isValid() &&
"invalid cost");
1056 void RateRegister(
const SCEV *Reg,
1061 void RatePrimaryRegister(
const SCEV *Reg,
1077 Value *OperandValToReplace =
nullptr;
1089 LSRFixup() =
default;
1091 bool isUseFullyOutsideLoop(
const Loop *L)
const;
1099 struct UniquifierDenseMapInfo {
1102 V.
push_back(reinterpret_cast<const SCEV *>(-1));
1108 V.
push_back(reinterpret_cast<const SCEV *>(-2));
1144 MemAccessTy AccessTy;
1151 int64_t MaxOffset = std::numeric_limits<int64_t>::min();
1155 bool AllFixupsOutsideLoop =
true;
1162 bool RigidFormula =
false;
1168 Type *WidestFixupType =
nullptr;
1178 LSRUse(KindType K, MemAccessTy AT) :
Kind(K), AccessTy(AT) {}
1180 LSRFixup &getNewFixup() {
1182 return Fixups.
back();
1185 void pushFixup(LSRFixup &f) {
1187 if (f.Offset > MaxOffset)
1188 MaxOffset = f.Offset;
1189 if (f.Offset < MinOffset)
1190 MinOffset = f.Offset;
1193 bool HasFormulaWithSameRegs(
const Formula &F)
const;
1194 float getNotSelectedProbability(
const SCEV *Reg)
const;
1195 bool InsertFormula(
const Formula &F,
const Loop &L);
1196 void DeleteFormula(Formula &F);
1197 void RecomputeRegs(
size_t LUIdx, RegUseTracker &Reguses);
1206 LSRUse::KindType
Kind, MemAccessTy AccessTy,
1208 bool HasBaseReg, int64_t Scale,
1212 void Cost::RateRegister(
const SCEV *Reg,
1221 if (AR->getLoop() != L) {
1228 if (!AR->getLoop()->contains(L)) {
1238 unsigned LoopCost = 1;
1240 const SCEV *LoopStep = AR->getStepRecurrence(SE);
1241 if (isa<SCEVConstant>(LoopStep)) {
1245 const SCEV *LoopStart = AR->getStart();
1246 if (!isa<SCEVConstant>(LoopStart) &&
1252 C.AddRecCost += LoopCost;
1256 if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
1257 if (!Regs.
count(AR->getOperand(1))) {
1258 RateRegister(AR->getOperand(1), Regs, L, SE, DT, TTI);
1268 if (!isa<SCEVUnknown>(Reg) &&
1269 !isa<SCEVConstant>(
Reg) &&
1270 !(isa<SCEVAddRecExpr>(Reg) &&
1271 (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(
Reg)->getStart()) ||
1272 isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
1275 C.NumIVMuls += isa<SCEVMulExpr>(
Reg) &&
1282 void Cost::RatePrimaryRegister(
const SCEV *Reg,
1288 if (LoserRegs && LoserRegs->
count(Reg)) {
1292 if (Regs.
insert(Reg).second) {
1293 RateRegister(Reg, Regs, L, SE, DT, TTI);
1294 if (LoserRegs && isLoser())
1307 assert(F.isCanonical(*L) &&
"Cost is accurate only for canonical formula");
1309 unsigned PrevAddRecCost =
C.AddRecCost;
1310 unsigned PrevNumRegs =
C.NumRegs;
1311 unsigned PrevNumBaseAdds =
C.NumBaseAdds;
1312 if (
const SCEV *ScaledReg = F.ScaledReg) {
1313 if (VisitedRegs.
count(ScaledReg)) {
1317 RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs, TTI);
1321 for (
const SCEV *BaseReg : F.BaseRegs) {
1322 if (VisitedRegs.
count(BaseReg)) {
1326 RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs, TTI);
1332 size_t NumBaseParts = F.getNumRegs();
1333 if (NumBaseParts > 1)
1338 C.NumBaseAdds += (F.UnfoldedOffset != 0);
1344 for (
const LSRFixup &
Fixup : LU.Fixups) {
1345 int64_t
O = Fixup.Offset;
1346 int64_t
Offset = (uint64_t)O + F.BaseOffset;
1350 else if (Offset != 0)
1357 Offset, F.HasBaseReg, F.Scale, Fixup.UserInst))
1363 assert(isValid() &&
"invalid cost");
1370 if (
C.NumRegs > TTIRegNum) {
1373 if (PrevNumRegs > TTIRegNum)
1374 C.Insns += (
C.NumRegs - PrevNumRegs);
1376 C.Insns += (
C.NumRegs - TTIRegNum);
1389 if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd() && !TTI.
canMacroFuseCmp())
1392 C.Insns += (
C.AddRecCost - PrevAddRecCost);
1395 if (LU.Kind != LSRUse::ICmpZero)
1396 C.Insns +=
C.NumBaseAdds - PrevNumBaseAdds;
1397 assert(isValid() &&
"invalid cost");
1415 C.Insns != Other.C.Insns)
1416 return C.Insns < Other.C.Insns;
1420 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1423 OS <<
C.Insns <<
" instruction" << (
C.Insns == 1 ?
" " :
"s ");
1424 OS <<
C.NumRegs <<
" reg" << (
C.NumRegs == 1 ?
"" :
"s");
1425 if (
C.AddRecCost != 0)
1426 OS <<
", with addrec cost " <<
C.AddRecCost;
1427 if (
C.NumIVMuls != 0)
1428 OS <<
", plus " <<
C.NumIVMuls <<
" IV mul" 1429 << (
C.NumIVMuls == 1 ?
"" :
"s");
1430 if (
C.NumBaseAdds != 0)
1431 OS <<
", plus " <<
C.NumBaseAdds <<
" base add" 1432 << (
C.NumBaseAdds == 1 ?
"" :
"s");
1433 if (
C.ScaleCost != 0)
1434 OS <<
", plus " <<
C.ScaleCost <<
" scale cost";
1436 OS <<
", plus " <<
C.ImmCost <<
" imm cost";
1437 if (
C.SetupCost != 0)
1438 OS <<
", plus " <<
C.SetupCost <<
" setup cost";
1447 bool LSRFixup::isUseFullyOutsideLoop(
const Loop *L)
const {
1449 if (
const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
1450 for (
unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1451 if (PN->getIncomingValue(i) == OperandValToReplace &&
1452 L->
contains(PN->getIncomingBlock(i)))
1460 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1466 Store->getOperand(0)->printAsOperand(OS,
false);
1467 }
else if (UserInst->getType()->isVoidTy())
1468 OS << UserInst->getOpcodeName();
1470 UserInst->printAsOperand(OS,
false);
1472 OS <<
", OperandValToReplace=";
1473 OperandValToReplace->printAsOperand(OS,
false);
1475 for (
const Loop *PIL : PostIncLoops) {
1476 OS <<
", PostIncLoop=";
1477 PIL->getHeader()->printAsOperand(OS,
false);
1481 OS <<
", Offset=" <<
Offset;
1491 bool LSRUse::HasFormulaWithSameRegs(
const Formula &F)
const {
1493 if (F.ScaledReg) Key.
push_back(F.ScaledReg);
1496 return Uniquifier.count(Key);
1500 float LSRUse::getNotSelectedProbability(
const SCEV *Reg)
const {
1502 for (
const Formula &F : Formulae)
1503 if (F.referencesReg(Reg))
1505 return ((
float)(Formulae.size() - FNum)) / Formulae.size();
1510 bool LSRUse::InsertFormula(
const Formula &F,
const Loop &L) {
1511 assert(F.isCanonical(L) &&
"Invalid canonical representation");
1513 if (!Formulae.empty() && RigidFormula)
1517 if (F.ScaledReg) Key.
push_back(F.ScaledReg);
1521 if (!Uniquifier.insert(Key).second)
1525 assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&
1526 "Zero allocated in a scaled register!");
1528 for (
const SCEV *BaseReg : F.BaseRegs)
1529 assert(!BaseReg->
isZero() &&
"Zero allocated in a base register!");
1533 Formulae.push_back(F);
1536 Regs.
insert(F.BaseRegs.begin(), F.BaseRegs.end());
1538 Regs.
insert(F.ScaledReg);
1544 void LSRUse::DeleteFormula(Formula &F) {
1545 if (&F != &Formulae.back())
1547 Formulae.pop_back();
1551 void LSRUse::RecomputeRegs(
size_t LUIdx, RegUseTracker &RegUses) {
1555 for (
const Formula &F : Formulae) {
1556 if (F.ScaledReg) Regs.
insert(F.ScaledReg);
1557 Regs.
insert(F.BaseRegs.begin(), F.BaseRegs.end());
1561 for (
const SCEV *S : OldRegs)
1563 RegUses.dropRegister(S, LUIdx);
1566 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1568 OS <<
"LSR Use: Kind=";
1570 case Basic: OS <<
"Basic";
break;
1571 case Special: OS <<
"Special";
break;
1572 case ICmpZero: OS <<
"ICmpZero";
break;
1574 OS <<
"Address of ";
1575 if (AccessTy.MemTy->isPointerTy())
1578 OS << *AccessTy.MemTy;
1581 OS <<
" in addrspace(" << AccessTy.AddrSpace <<
')';
1584 OS <<
", Offsets={";
1585 bool NeedComma =
false;
1587 if (NeedComma) OS <<
',';
1593 if (AllFixupsOutsideLoop)
1594 OS <<
", all-fixups-outside-loop";
1596 if (WidestFixupType)
1597 OS <<
", widest fixup type: " << *WidestFixupType;
1606 LSRUse::KindType Kind, MemAccessTy AccessTy,
1608 bool HasBaseReg, int64_t Scale,
1613 HasBaseReg, Scale, AccessTy.AddrSpace, Fixup);
1615 case LSRUse::ICmpZero:
1622 if (Scale != 0 && HasBaseReg && BaseOffset != 0)
1627 if (Scale != 0 && Scale != -1)
1632 if (BaseOffset != 0) {
1640 BaseOffset = -(uint64_t)BaseOffset;
1649 return !BaseGV && Scale == 0 && BaseOffset == 0;
1651 case LSRUse::Special:
1653 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
1660 int64_t MinOffset, int64_t MaxOffset,
1661 LSRUse::KindType Kind, MemAccessTy AccessTy,
1663 bool HasBaseReg, int64_t Scale) {
1665 if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
1668 MinOffset = (uint64_t)BaseOffset + MinOffset;
1669 if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
1672 MaxOffset = (uint64_t)BaseOffset + MaxOffset;
1675 HasBaseReg, Scale) &&
1681 int64_t MinOffset, int64_t MaxOffset,
1682 LSRUse::KindType Kind, MemAccessTy AccessTy,
1683 const Formula &F,
const Loop &L) {
1691 assert((F.isCanonical(L) || F.Scale != 0));
1693 F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);
1698 int64_t MaxOffset, LSRUse::KindType Kind,
1700 int64_t BaseOffset,
bool HasBaseReg, int64_t Scale) {
1703 BaseOffset, HasBaseReg, Scale) ||
1708 BaseGV, BaseOffset,
true, 0));
1712 int64_t MaxOffset, LSRUse::KindType Kind,
1713 MemAccessTy AccessTy,
const Formula &F) {
1714 return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,
1715 F.BaseOffset, F.HasBaseReg, F.Scale);
1719 const LSRUse &LU,
const Formula &F) {
1722 for (
const LSRFixup &
Fixup : LU.Fixups)
1724 (F.BaseOffset + Fixup.Offset), F.HasBaseReg,
1725 F.Scale, Fixup.UserInst))
1731 LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,
1736 const LSRUse &LU,
const Formula &F,
1745 return F.Scale != 1;
1751 LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MinOffset, F.HasBaseReg,
1752 F.Scale, LU.AccessTy.AddrSpace);
1754 LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MaxOffset, F.HasBaseReg,
1755 F.Scale, LU.AccessTy.AddrSpace);
1757 assert(ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >= 0 &&
1758 "Legal addressing mode has an illegal cost!");
1759 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
1761 case LSRUse::ICmpZero:
1763 case LSRUse::Special:
1773 LSRUse::KindType Kind, MemAccessTy AccessTy,
1777 if (BaseOffset == 0 && !BaseGV)
return true;
1781 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
1785 if (!HasBaseReg && Scale == 1) {
1796 int64_t MaxOffset, LSRUse::KindType Kind,
1797 MemAccessTy AccessTy,
const SCEV *S,
1800 if (S->
isZero())
return true;
1808 if (!S->
isZero())
return false;
1811 if (BaseOffset == 0 && !BaseGV)
return true;
1815 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
1818 BaseOffset, HasBaseReg, Scale);
1835 const SCEV *IncExpr;
1838 : UserInst(U), IVOperand(O), IncExpr(E) {}
1845 const SCEV *ExprBase =
nullptr;
1847 IVChain() =
default;
1848 IVChain(
const IVInc &Head,
const SCEV *
Base)
1849 : Incs(1, Head), ExprBase(Base) {}
1854 const_iterator
begin()
const {
1856 return std::next(Incs.
begin());
1858 const_iterator
end()
const {
1863 bool hasIncs()
const {
return Incs.
size() >= 2; }
1872 bool isProfitableIncrement(
const SCEV *OperExpr,
1873 const SCEV *IncExpr,
1893 bool Changed =
false;
1916 RegUseTracker RegUses;
1921 static const unsigned MaxChains = 8;
1929 void OptimizeShadowIV();
1932 void OptimizeLoopTermCond();
1936 void FinalizeChain(IVChain &Chain);
1937 void CollectChains();
1941 void CollectInterestingTypesAndFactors();
1942 void CollectFixupsAndInitialFormulae();
1948 bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
bool HasBaseReg,
1949 LSRUse::KindType Kind, MemAccessTy AccessTy);
1951 std::pair<size_t, int64_t> getUse(
const SCEV *&Expr, LSRUse::KindType Kind,
1952 MemAccessTy AccessTy);
1954 void DeleteUse(LSRUse &LU,
size_t LUIdx);
1956 LSRUse *FindUseWithSimilarFormula(
const Formula &F,
const LSRUse &OrigLU);
1958 void InsertInitialFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
1959 void InsertSupplementalFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
1960 void CountRegisters(
const Formula &F,
size_t LUIdx);
1961 bool InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &F);
1963 void CollectLoopInvariantFixupsAndFormulae();
1965 void GenerateReassociations(LSRUse &LU,
unsigned LUIdx, Formula
Base,
1966 unsigned Depth = 0);
1968 void GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
1969 const Formula &Base,
unsigned Depth,
1970 size_t Idx,
bool IsScaledReg =
false);
1971 void GenerateCombinations(LSRUse &LU,
unsigned LUIdx, Formula Base);
1972 void GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
1973 const Formula &Base,
size_t Idx,
1974 bool IsScaledReg =
false);
1975 void GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx, Formula Base);
1976 void GenerateConstantOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
1977 const Formula &Base,
1979 size_t Idx,
bool IsScaledReg =
false);
1980 void GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx, Formula Base);
1981 void GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx, Formula Base);
1982 void GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula Base);
1983 void GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula Base);
1984 void GenerateCrossUseConstantOffsets();
1985 void GenerateAllReuseFormulae();
1987 void FilterOutUndesirableDedicatedRegisters();
1989 size_t EstimateSearchSpaceComplexity()
const;
1990 void NarrowSearchSpaceByDetectingSupersets();
1991 void NarrowSearchSpaceByCollapsingUnrolledCode();
1992 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
1993 void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
1994 void NarrowSearchSpaceByDeletingCostlyFormulas();
1995 void NarrowSearchSpaceByPickingWinnerRegs();
1996 void NarrowSearchSpaceUsingHeuristics();
2001 const Cost &CurCost,
2015 Value *Expand(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &F,
2018 void RewriteForPHI(
PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
2021 void Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &F,
2030 bool getChanged()
const {
return Changed; }
2032 void print_factors_and_types(
raw_ostream &OS)
const;
2043 void LSRInstance::OptimizeShadowIV() {
2045 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2053 Type *DestTy =
nullptr;
2054 bool IsSigned =
false;
2068 if (
UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
2070 DestTy = UCast->getDestTy();
2072 else if (
SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
2074 DestTy = SCast->getDestTy();
2076 if (!DestTy)
continue;
2096 if (Mantissa == -1)
continue;
2100 unsigned Entry, Latch;
2110 if (!Init)
continue;
2117 if (!Incr)
continue;
2119 && Incr->getOpcode() != Instruction::Sub)
2124 if (Incr->getOperand(0) == PH)
2125 C = dyn_cast<ConstantInt>(Incr->getOperand(1));
2126 else if (Incr->getOperand(1) == PH)
2127 C = dyn_cast<ConstantInt>(Incr->getOperand(0));
2144 Instruction::FAdd : Instruction::FSub,
2145 NewPH, CFP,
"IV.S.next.", Incr);
2162 if (U.getUser() == Cond) {
2227 if (!Sel || !Sel->
hasOneUse())
return Cond;
2230 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2235 const SCEV *IterationCount = SE.
getAddExpr(One, BackedgeTakenCount);
2236 if (IterationCount != SE.
getSCEV(Sel))
return Cond;
2243 if (
const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
2246 }
else if (
const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
2249 }
else if (
const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
2281 "Loop condition operand is an addrec in a different loop!");
2285 Value *NewRHS =
nullptr;
2289 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2290 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2291 NewRHS = BO->getOperand(0);
2293 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2294 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2295 NewRHS = BO->getOperand(0);
2302 else if (
const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
2303 NewRHS = SU->getValue();
2331 LSRInstance::OptimizeLoopTermCond() {
2350 return LatchBlock != BB;
2358 for (
BasicBlock *ExitingBlock : ExitingBlocks) {
2374 if (!FindIVUserForCond(Cond, CondUse))
2383 Cond = OptimizeMax(Cond, CondUse);
2388 if (!DT.
dominates(ExitingBlock, LatchBlock))
2393 if (LatchBlock != ExitingBlock)
2397 if (&*UI != CondUse &&
2401 const SCEV *A = IU.getStride(*CondUse, L);
2402 const SCEV *
B = IU.getStride(*UI, L);
2403 if (!A || !B)
continue;
2413 dyn_cast_or_null<SCEVConstant>(
getExactSDiv(B, A, SE))) {
2417 goto decline_post_inc;
2421 goto decline_post_inc;
2423 if (
isAddressUse(TTI, UI->getUser(), UI->getOperandValToReplace())) {
2425 TTI, UI->getUser(), UI->getOperandValToReplace());
2430 AccessTy.AddrSpace))
2431 goto decline_post_inc;
2436 AccessTy.AddrSpace))
2437 goto decline_post_inc;
2442 LLVM_DEBUG(
dbgs() <<
" Change loop exiting icmp to use postinc iv: " 2454 Cond = cast<ICmpInst>(Cond->
clone());
2456 ExitingBlock->getInstList().insert(TermBr->
getIterator(), Cond);
2483 IVIncInsertPos = Inst;
2484 else if (BB != IVIncInsertPos->
getParent())
2491 bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
2492 bool HasBaseReg, LSRUse::KindType Kind,
2493 MemAccessTy AccessTy) {
2494 int64_t NewMinOffset = LU.MinOffset;
2495 int64_t NewMaxOffset = LU.MaxOffset;
2496 MemAccessTy NewAccessTy = AccessTy;
2501 if (LU.Kind != Kind)
2508 if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2509 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),
2510 AccessTy.AddrSpace);
2515 if (NewOffset < LU.MinOffset) {
2517 LU.MaxOffset - NewOffset, HasBaseReg))
2519 NewMinOffset = NewOffset;
2520 }
else if (NewOffset > LU.MaxOffset) {
2522 NewOffset - LU.MinOffset, HasBaseReg))
2524 NewMaxOffset = NewOffset;
2528 LU.MinOffset = NewMinOffset;
2529 LU.MaxOffset = NewMaxOffset;
2530 LU.AccessTy = NewAccessTy;
2537 std::pair<size_t, int64_t> LSRInstance::getUse(
const SCEV *&Expr,
2538 LSRUse::KindType Kind,
2539 MemAccessTy AccessTy) {
2540 const SCEV *Copy = Expr;
2550 std::pair<UseMapTy::iterator, bool>
P =
2554 size_t LUIdx = P.first->second;
2555 LSRUse &LU = Uses[LUIdx];
2556 if (reconcileNewOffset(LU, Offset,
true, Kind, AccessTy))
2558 return std::make_pair(LUIdx, Offset);
2562 size_t LUIdx = Uses.size();
2563 P.first->second = LUIdx;
2564 Uses.push_back(LSRUse(Kind, AccessTy));
2565 LSRUse &LU = Uses[LUIdx];
2569 return std::make_pair(LUIdx, Offset);
2573 void LSRInstance::DeleteUse(LSRUse &LU,
size_t LUIdx) {
2574 if (&LU != &Uses.back())
2579 RegUses.swapAndDropUse(LUIdx, Uses.size());
2585 LSRInstance::FindUseWithSimilarFormula(
const Formula &OrigF,
2586 const LSRUse &OrigLU) {
2588 for (
size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
2589 LSRUse &LU = Uses[LUIdx];
2595 if (&LU != &OrigLU &&
2596 LU.Kind != LSRUse::ICmpZero &&
2597 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2598 LU.WidestFixupType == OrigLU.WidestFixupType &&
2599 LU.HasFormulaWithSameRegs(OrigF)) {
2601 for (
const Formula &F : LU.Formulae) {
2604 if (F.BaseRegs == OrigF.BaseRegs &&
2605 F.ScaledReg == OrigF.ScaledReg &&
2606 F.BaseGV == OrigF.BaseGV &&
2607 F.Scale == OrigF.Scale &&
2608 F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2609 if (F.BaseOffset == 0)
2624 void LSRInstance::CollectInterestingTypesAndFactors() {
2630 const SCEV *Expr = IU.getExpr(U);
2640 if (AR->getLoop() == L)
2641 Strides.
insert(AR->getStepRecurrence(SE));
2646 }
while (!Worklist.
empty());
2651 I = Strides.
begin(),
E = Strides.
end(); I !=
E; ++
I)
2653 std::next(I); NewStrideIter !=
E; ++NewStrideIter) {
2654 const SCEV *OldStride = *
I;
2655 const SCEV *NewStride = *NewStrideIter;
2666 dyn_cast_or_null<SCEVConstant>(
getExactSDiv(NewStride, OldStride,
2668 if (Factor->getAPInt().getMinSignedBits() <= 64)
2669 Factors.insert(Factor->getAPInt().getSExtValue());
2674 if (Factor->getAPInt().getMinSignedBits() <= 64)
2675 Factors.insert(Factor->getAPInt().getSExtValue());
2681 if (Types.size() == 1)
2693 for(; OI != OE; ++OI) {
2694 if (
Instruction *Oper = dyn_cast<Instruction>(*OI)) {
2699 dyn_cast<SCEVAddRecExpr>(SE.
getSCEV(Oper))) {
2700 if (AR->getLoop() == L)
2711 if (
TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
2712 return Trunc->getOperand(0);
2745 return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
2747 return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
2749 return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
2755 for (std::reverse_iterator<SCEVAddExpr::op_iterator>
I(Add->
op_end()),
2757 const SCEV *SubExpr = *
I;
2767 return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
2776 bool IVChain::isProfitableIncrement(
const SCEV *OperExpr,
2777 const SCEV *IncExpr,
2785 if (!isa<SCEVConstant>(IncExpr)) {
2787 if (isa<SCEVConstant>(SE.
getMinusSCEV(OperExpr, HeadExpr)))
2811 if (!Chain.hasIncs())
2814 if (!Users.
empty()) {
2815 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" users:\n";
2817 : Users) {
dbgs() <<
" " << *Inst <<
"\n"; });
2820 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
2828 if (isa<PHINode>(Chain.tailUserInst())
2829 && SE.
getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
2832 const SCEV *LastIncExpr =
nullptr;
2833 unsigned NumConstIncrements = 0;
2834 unsigned NumVarIncrements = 0;
2835 unsigned NumReusedIncrements = 0;
2836 for (
const IVInc &Inc : Chain) {
2837 if (Inc.IncExpr->isZero())
2842 if (isa<SCEVConstant>(Inc.IncExpr)) {
2843 ++NumConstIncrements;
2847 if (Inc.IncExpr == LastIncExpr)
2848 ++NumReusedIncrements;
2852 LastIncExpr = Inc.IncExpr;
2857 if (NumConstIncrements > 1)
2864 cost += NumVarIncrements;
2868 cost -= NumReusedIncrements;
2870 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" Cost: " << cost
2887 unsigned ChainIdx = 0, NChains = IVChainVec.size();
2888 const SCEV *LastIncExpr =
nullptr;
2889 for (; ChainIdx < NChains; ++ChainIdx) {
2890 IVChain &Chain = IVChainVec[ChainIdx];
2896 if (!StressIVChain && Chain.ExprBase != OperExprBase)
2904 if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
2913 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
2914 LastIncExpr = IncExpr;
2920 if (ChainIdx == NChains) {
2921 if (isa<PHINode>(UserInst))
2923 if (NChains >= MaxChains && !StressIVChain) {
2927 LastIncExpr = OperExpr;
2931 if (!isa<SCEVAddRecExpr>(LastIncExpr))
2934 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
2936 ChainUsersVec.
resize(NChains);
2937 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Head: (" << *UserInst
2938 <<
") IV=" << *LastIncExpr <<
"\n");
2940 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Inc: (" << *UserInst
2941 <<
") IV+" << *LastIncExpr <<
"\n");
2943 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
2945 IVChain &Chain = IVChainVec[ChainIdx];
2949 if (!LastIncExpr->
isZero()) {
2950 ChainUsersVec[ChainIdx].FarUsers.
insert(NearUsers.
begin(),
2966 IVChain::const_iterator IncIter = Chain.Incs.begin();
2967 IVChain::const_iterator IncEnd = Chain.Incs.end();
2968 for( ; IncIter != IncEnd; ++IncIter) {
2969 if (IncIter->UserInst == OtherUse)
2972 if (IncIter != IncEnd)
2976 && !isa<SCEVUnknown>(SE.
getSCEV(OtherUse))
2977 && IU.isIVUserOrOperand(OtherUse)) {
2980 NearUsers.
insert(OtherUse);
2985 ChainUsersVec[ChainIdx].FarUsers.
erase(UserInst);
3010 void LSRInstance::CollectChains() {
3017 Rung->
getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3026 if (isa<PHINode>(I) || !IU.isIVUserOrOperand(&I))
3036 for (
unsigned ChainIdx = 0, NChains = IVChainVec.size();
3037 ChainIdx < NChains; ++ChainIdx) {
3038 ChainUsersVec[ChainIdx].NearUsers.
erase(&I);
3044 while (IVOpIter != IVOpEnd) {
3045 Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
3046 if (UniqueOperands.
insert(IVOpInst).second)
3047 ChainInstruction(&I, IVOpInst, ChainUsersVec);
3048 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3060 ChainInstruction(&PN, IncV, ChainUsersVec);
3063 unsigned ChainIdx = 0;
3064 for (
unsigned UsersIdx = 0, NChains = IVChainVec.size();
3065 UsersIdx < NChains; ++UsersIdx) {
3067 ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
3070 if (ChainIdx != UsersIdx)
3071 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
3072 FinalizeChain(IVChainVec[ChainIdx]);
3075 IVChainVec.resize(ChainIdx);
3078 void LSRInstance::FinalizeChain(IVChain &Chain) {
3079 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3080 LLVM_DEBUG(
dbgs() <<
"Final Chain: " << *Chain.Incs[0].UserInst <<
"\n");
3082 for (
const IVInc &Inc : Chain) {
3084 auto UseI =
find(Inc.UserInst->operands(), Inc.IVOperand);
3085 assert(UseI != Inc.UserInst->op_end() &&
"cannot find IV operand");
3086 IVIncSet.insert(UseI);
3094 if (!IncConst || !
isAddressUse(TTI, UserInst, Operand))
3100 MemAccessTy AccessTy =
getAccessType(TTI, UserInst, Operand);
3115 const IVInc &Head = Chain.Incs[0];
3120 Value *IVSrc =
nullptr;
3121 while (IVOpIter != IVOpEnd) {
3132 if (SE.
getSCEV(*IVOpIter) == Head.IncExpr
3133 || SE.
getSCEV(IVSrc) == Head.IncExpr) {
3136 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3138 if (IVOpIter == IVOpEnd) {
3140 LLVM_DEBUG(
dbgs() <<
"Concealed chain head: " << *Head.UserInst <<
"\n");
3145 Type *IVTy = IVSrc->getType();
3147 const SCEV *LeftOverExpr =
nullptr;
3148 for (
const IVInc &Inc : Chain) {
3150 if (isa<PHINode>(InsertPt))
3151 InsertPt = L->getLoopLatch()->getTerminator();
3155 Value *IVOper = IVSrc;
3156 if (!Inc.IncExpr->isZero()) {
3160 LeftOverExpr = LeftOverExpr ?
3161 SE.
getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
3163 if (LeftOverExpr && !LeftOverExpr->
isZero()) {
3169 IVOper = Rewriter.
expandCodeFor(IVOperExpr, IVTy, InsertPt);
3173 assert(IVTy == IVOper->
getType() &&
"inconsistent IV increment type");
3175 LeftOverExpr =
nullptr;
3178 Type *OperTy = Inc.IVOperand->getType();
3179 if (IVTy != OperTy) {
3181 "cannot extend a chained IV");
3183 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy,
"lsr.chain");
3185 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
3190 if (isa<PHINode>(Chain.tailUserInst())) {
3191 for (
PHINode &Phi : L->getHeader()->phis()) {
3195 Phi.getIncomingValueForBlock(L->getLoopLatch()));
3198 Value *IVOper = IVSrc;
3200 if (IVTy != PostIncTy) {
3202 IRBuilder<> Builder(L->getLoopLatch()->getTerminator());
3204 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy,
"lsr.chain");
3206 Phi.replaceUsesOfWith(PostIncV, IVOper);
3212 void LSRInstance::CollectFixupsAndInitialFormulae() {
3217 find(UserInst->
operands(), U.getOperandValToReplace());
3218 assert(UseI != UserInst->
op_end() &&
"cannot find IV operand");
3219 if (IVIncSet.count(UseI)) {
3220 LLVM_DEBUG(
dbgs() <<
"Use is in profitable chain: " << **UseI <<
'\n');
3225 MemAccessTy AccessTy;
3226 if (
isAddressUse(TTI, UserInst, U.getOperandValToReplace())) {
3228 AccessTy =
getAccessType(TTI, UserInst, U.getOperandValToReplace());
3231 const SCEV *S = IU.getExpr(U);
3240 if (
ICmpInst *CI = dyn_cast<ICmpInst>(UserInst))
3241 if (CI->isEquality()) {
3244 Value *
NV = CI->getOperand(1);
3245 if (NV == U.getOperandValToReplace()) {
3246 CI->setOperand(1, CI->getOperand(0));
3247 CI->setOperand(0, NV);
3248 NV = CI->getOperand(1);
3258 Kind = LSRUse::ICmpZero;
3264 for (
size_t i = 0, e = Factors.size(); i != e; ++i)
3265 if (Factors[i] != -1)
3266 Factors.insert(-(uint64_t)Factors[i]);
3271 std::pair<size_t, int64_t>
P = getUse(S, Kind, AccessTy);
3272 size_t LUIdx = P.first;
3273 int64_t
Offset = P.second;
3274 LSRUse &LU = Uses[LUIdx];
3277 LSRFixup &LF = LU.getNewFixup();
3278 LF.UserInst = UserInst;
3279 LF.OperandValToReplace = U.getOperandValToReplace();
3280 LF.PostIncLoops = TmpPostIncLoops;
3282 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3284 if (!LU.WidestFixupType ||
3287 LU.WidestFixupType = LF.OperandValToReplace->getType();
3290 if (LU.Formulae.empty()) {
3291 InsertInitialFormula(S, LU, LUIdx);
3292 CountRegisters(LU.Formulae.back(), LUIdx);
3302 LSRInstance::InsertInitialFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx) {
3305 LU.RigidFormula =
true;
3308 F.initialMatch(S, L, SE);
3309 bool Inserted = InsertFormula(LU, LUIdx, F);
3310 assert(Inserted &&
"Initial formula already exists!"); (void)Inserted;
3316 LSRInstance::InsertSupplementalFormula(
const SCEV *S,
3317 LSRUse &LU,
size_t LUIdx) {
3319 F.BaseRegs.push_back(S);
3320 F.HasBaseReg =
true;
3321 bool Inserted = InsertFormula(LU, LUIdx, F);
3322 assert(Inserted &&
"Supplemental formula already exists!"); (void)Inserted;
3326 void LSRInstance::CountRegisters(
const Formula &F,
size_t LUIdx) {
3328 RegUses.countRegister(F.ScaledReg, LUIdx);
3329 for (
const SCEV *BaseReg : F.BaseRegs)
3330 RegUses.countRegister(BaseReg, LUIdx);
3335 bool LSRInstance::InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &F) {
3337 assert(
isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&
3338 "Formula is illegal");
3340 if (!LU.InsertFormula(F, *L))
3343 CountRegisters(F, LUIdx);
3353 LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3357 while (!Worklist.empty()) {
3358 const SCEV *S = Worklist.pop_back_val();
3361 if (!Visited.
insert(S).second)
3365 Worklist.append(
N->op_begin(),
N->op_end());
3368 else if (
const SCEVUDivExpr *
D = dyn_cast<SCEVUDivExpr>(S)) {
3369 Worklist.push_back(
D->getLHS());
3370 Worklist.push_back(
D->getRHS());
3371 }
else if (
const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
3372 const Value *V = US->getValue();
3373 if (
const Instruction *Inst = dyn_cast<Instruction>(V)) {
3376 }
else if (isa<UndefValue>(V))
3379 for (
const Use &U : V->
uses()) {
3389 const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
3391 cast<PHINode>(UserInst)->getIncomingBlock(
3404 const SCEV *UserS = SE.
getSCEV(const_cast<Instruction *>(UserInst));
3406 if (!isa<SCEVUnknown>(UserS))
3410 SE.
getUnknown(const_cast<Instruction *>(UserInst)));
3415 if (
const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
3416 unsigned OtherIdx = !U.getOperandNo();
3417 Value *OtherOp =
const_cast<Value *
>(ICI->getOperand(OtherIdx));
3422 std::pair<size_t, int64_t>
P = getUse(
3424 size_t LUIdx = P.first;
3425 int64_t
Offset = P.second;
3426 LSRUse &LU = Uses[LUIdx];
3427 LSRFixup &LF = LU.getNewFixup();
3428 LF.UserInst =
const_cast<Instruction *
>(UserInst);
3429 LF.OperandValToReplace = U;
3431 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3432 if (!LU.WidestFixupType ||
3435 LU.WidestFixupType = LF.OperandValToReplace->getType();
3436 InsertSupplementalFormula(US, LU, LUIdx);
3437 CountRegisters(LU.Formulae.back(), Uses.size() - 1);
3453 unsigned Depth = 0) {
3460 for (
const SCEV *S :
Add->operands()) {
3466 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
3468 if (AR->getStart()->isZero() || !AR->isAffine())
3475 if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
3476 Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
3477 Remainder =
nullptr;
3479 if (Remainder != AR->getStart()) {
3481 Remainder = SE.getConstant(AR->getType(), 0);
3482 return SE.getAddRecExpr(Remainder,
3483 AR->getStepRecurrence(SE),
3488 }
else if (
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
3490 if (Mul->getNumOperands() != 2)
3493 dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
3494 C = C ? cast<SCEVConstant>(SE.
getMulExpr(C, Op0)) : Op0;
3495 const SCEV *Remainder =
3498 Ops.push_back(SE.getMulExpr(C, Remainder));
3508 LSRUse &LU,
const SCEV *S,
const Loop *L,
3511 !LU.AccessTy.getType()->isIntOrIntVectorTy())
3517 if (!isa<SCEVConstant>(LoopStep))
3519 if (LU.AccessTy.getType()->getScalarSizeInBits() !=
3526 if (!isa<SCEVConstant>(LoopStart) && SE.
isLoopInvariant(LoopStart, L))
3533 void LSRInstance::GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
3534 const Formula &
Base,
3535 unsigned Depth,
size_t Idx,
3537 const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3549 if (AddOps.
size() == 1)
3563 LU.AccessTy, *J, Base.getNumRegs() > 1))
3569 InnerAddOps.
append(std::next(J),
3574 if (InnerAddOps.
size() == 1 &&
3576 LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
3592 F.ScaledReg =
nullptr;
3594 F.BaseRegs.erase(F.BaseRegs.begin() + Idx);
3595 }
else if (IsScaledReg)
3596 F.ScaledReg = InnerSum;
3598 F.BaseRegs[Idx] = InnerSum;
3608 F.BaseRegs.push_back(*J);
3613 if (InsertFormula(LU, LUIdx, F))
3620 GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
3626 void LSRInstance::GenerateReassociations(LSRUse &LU,
unsigned LUIdx,
3627 Formula Base,
unsigned Depth) {
3628 assert(Base.isCanonical(*L) &&
"Input must be in the canonical form");
3633 for (
size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
3634 GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i);
3636 if (Base.Scale == 1)
3637 GenerateReassociationsImpl(LU, LUIdx, Base, Depth,
3643 void LSRInstance::GenerateCombinations(LSRUse &LU,
unsigned LUIdx,
3646 if (Base.BaseRegs.size() + (Base.Scale == 1) +
3647 (Base.UnfoldedOffset != 0) <= 1)
3654 Formula NewBase = Base;
3655 NewBase.BaseRegs.clear();
3656 Type *CombinedIntegerType =
nullptr;
3657 for (
const SCEV *BaseReg : Base.BaseRegs) {
3660 if (!CombinedIntegerType)
3665 NewBase.BaseRegs.push_back(BaseReg);
3669 if (Ops.
size() == 0)
3674 auto GenerateFormula = [&](
const SCEV *Sum) {
3675 Formula F = NewBase;
3683 F.BaseRegs.push_back(Sum);
3685 (void)InsertFormula(LU, LUIdx, F);
3689 if (Ops.
size() > 1) {
3696 if (NewBase.UnfoldedOffset) {
3697 assert(CombinedIntegerType &&
"Missing a type for the unfolded offset");
3700 NewBase.UnfoldedOffset = 0;
3706 void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
3707 const Formula &Base,
size_t Idx,
3709 const SCEV *
G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3715 if (!
isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
3720 F.BaseRegs[Idx] =
G;
3721 (void)InsertFormula(LU, LUIdx, F);
3725 void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx,
3728 if (Base.BaseGV)
return;
3730 for (
size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
3731 GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i);
3732 if (Base.Scale == 1)
3733 GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, -1,
3738 void LSRInstance::GenerateConstantOffsetsImpl(
3739 LSRUse &LU,
unsigned LUIdx,
const Formula &Base,
3741 const SCEV *
G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3742 for (int64_t
Offset : Worklist) {
3744 F.BaseOffset = (uint64_t)Base.BaseOffset -
Offset;
3753 F.ScaledReg =
nullptr;
3755 F.deleteBaseReg(F.BaseRegs[Idx]);
3757 }
else if (IsScaledReg)
3760 F.BaseRegs[Idx] = NewG;
3762 (void)InsertFormula(LU, LUIdx, F);
3767 if (G->
isZero() || Imm == 0)
3770 F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
3771 if (!
isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
3776 F.BaseRegs[Idx] =
G;
3777 (void)InsertFormula(LU, LUIdx, F);
3781 void LSRInstance::GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx,
3787 if (LU.MaxOffset != LU.MinOffset)
3790 for (
size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
3791 GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i);
3792 if (Base.Scale == 1)
3793 GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, -1,
3799 void LSRInstance::GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx,
3801 if (LU.Kind != LSRUse::ICmpZero)
return;
3804 Type *IntTy = Base.getType();
3809 if (LU.MinOffset != LU.MaxOffset)
return;
3812 if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())
3814 for (
const SCEV *BaseReg : Base.BaseRegs)
3817 assert(!Base.BaseGV &&
"ICmpZero use is not legal!");
3820 for (int64_t Factor : Factors) {
3822 if (Base.BaseOffset == std::numeric_limits<int64_t>::min() && Factor == -1)
3824 int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
3825 if (NewBaseOffset / Factor != Base.BaseOffset)
3833 int64_t
Offset = LU.MinOffset;
3834 if (Offset == std::numeric_limits<int64_t>::min() && Factor == -1)
3836 Offset = (uint64_t)Offset * Factor;
3837 if (Offset / Factor != LU.MinOffset)
3845 F.BaseOffset = NewBaseOffset;
3848 if (!
isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F))
3852 F.BaseOffset = (uint64_t)F.BaseOffset + Offset - LU.MinOffset;
3857 for (
size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
3858 F.BaseRegs[i] = SE.
getMulExpr(F.BaseRegs[i], FactorS);
3859 if (
getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
3865 F.ScaledReg = SE.
getMulExpr(F.ScaledReg, FactorS);
3866 if (
getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
3871 if (F.UnfoldedOffset != 0) {
3872 if (F.UnfoldedOffset == std::numeric_limits<int64_t>::min() &&
3875 F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor;
3876 if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset)
3885 (void)InsertFormula(LU, LUIdx, F);
3892 void LSRInstance::GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula Base) {
3894 Type *IntTy = Base.getType();
3899 if (Base.Scale != 0 && !Base.unscale())
3902 assert(Base.Scale == 0 &&
"unscale did not did its job!");
3905 for (int64_t Factor : Factors) {
3906 Base.Scale = Factor;
3907 Base.HasBaseReg = Base.BaseRegs.size() > 1;
3909 if (!
isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
3914 isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
3915 LU.AccessTy, Base) &&
3916 LU.AllFixupsOutsideLoop)
3917 LU.Kind = LSRUse::Special;
3923 if (LU.Kind == LSRUse::ICmpZero &&
3924 !Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV)
3927 for (
size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
3929 if (AR && (AR->
getLoop() == L || LU.AllFixupsOutsideLoop)) {
3938 F.ScaledReg = Quotient;
3939 F.deleteBaseReg(F.BaseRegs[i]);
3943 if (F.Scale == 1 && (F.BaseRegs.empty() ||
3944 (AR->
getLoop() != L && LU.AllFixupsOutsideLoop)))
3948 if (F.Scale == 1 && LU.AllFixupsOutsideLoop)
3950 (void)InsertFormula(LU, LUIdx, F);
3958 void LSRInstance::GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula Base) {
3960 if (Base.BaseGV)
return;
3963 Type *DstTy = Base.getType();
3967 for (
Type *SrcTy : Types) {
3972 for (
const SCEV *&BaseReg : F.BaseRegs)
3977 if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
3981 (void)InsertFormula(LU, LUIdx, F);
3994 const SCEV *OrigReg;
3996 WorkItem(
size_t LI, int64_t I,
const SCEV *R)
3997 : LUIdx(LI), Imm(I), OrigReg(R) {}
4005 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 4007 OS <<
"in formulae referencing " << *OrigReg <<
" in use " << LUIdx
4008 <<
" , add offset " << Imm;
4018 void LSRInstance::GenerateCrossUseConstantOffsets() {
4020 using ImmMapTy = std::map<int64_t, const SCEV *>;
4025 for (
const SCEV *
Use : RegUses) {
4028 auto Pair = Map.
insert(std::make_pair(Reg, ImmMapTy()));
4031 Pair.first->second.insert(std::make_pair(Imm,
Use));
4032 UsedByIndicesMap[
Reg] |= RegUses.getUsedByIndices(
Use);
4040 for (
const SCEV *Reg : Sequence) {
4041 const ImmMapTy &Imms = Map.
find(Reg)->second;
4044 if (Imms.size() == 1)
4047 LLVM_DEBUG(
dbgs() <<
"Generating cross-use offsets for " << *Reg <<
':';
4048 for (
const auto &Entry
4050 <<
' ' << Entry.first;
4054 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4056 const SCEV *OrigReg = J->second;
4058 int64_t JImm = J->first;
4059 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
4061 if (!isa<SCEVConstant>(OrigReg) &&
4062 UsedByIndicesMap[Reg].
count() == 1) {
4070 ImmMapTy::const_iterator OtherImms[] = {
4071 Imms.begin(), std::prev(Imms.end()),
4072 Imms.lower_bound((Imms.begin()->first + std::prev(Imms.end())->
first) /
4076 ImmMapTy::const_iterator M = OtherImms[i];
4077 if (M == J || M == JE)
continue;
4080 int64_t Imm = (uint64_t)JImm - M->first;
4083 if (UniqueItems.
insert(std::make_pair(LUIdx, Imm)).second)
4084 WorkItems.
push_back(WorkItem(LUIdx, Imm, OrigReg));
4091 UsedByIndicesMap.
clear();
4092 UniqueItems.
clear();
4095 for (
const WorkItem &WI : WorkItems) {
4096 size_t LUIdx = WI.LUIdx;
4097 LSRUse &LU = Uses[LUIdx];
4098 int64_t Imm = WI.Imm;
4099 const SCEV *OrigReg = WI.OrigReg;
4106 for (
size_t L = 0,
LE = LU.Formulae.size(); L !=
LE; ++L) {
4107 Formula F = LU.Formulae[L];
4114 if (F.ScaledReg == OrigReg) {
4115 int64_t
Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
4117 if (F.referencesReg(SE.
getSCEV(
4121 NewF.BaseOffset =
Offset;
4122 if (!
isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4125 NewF.ScaledReg = SE.
getAddExpr(NegImmS, NewF.ScaledReg);
4130 if (
const SCEVConstant *
C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
4131 if (
C->getValue()->isNegative() != (NewF.BaseOffset < 0) &&
4132 (
C->getAPInt().abs() *
APInt(BitWidth, F.Scale))
4137 NewF.canonicalize(*this->L);
4138 (void)InsertFormula(LU, LUIdx, NewF);
4141 for (
size_t N = 0,
NE = F.BaseRegs.size();
N !=
NE; ++
N) {
4142 const SCEV *BaseReg = F.BaseRegs[
N];
4143 if (BaseReg != OrigReg)
4146 NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm;
4148 LU.Kind, LU.AccessTy, NewF)) {
4155 NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
4157 NewF.BaseRegs[
N] = SE.
getAddExpr(NegImmS, BaseReg);
4162 for (
const SCEV *NewReg : NewF.BaseRegs)
4164 if ((
C->getAPInt() + NewF.BaseOffset)
4168 countTrailingZeros<uint64_t>(NewF.BaseOffset))
4172 NewF.canonicalize(*this->L);
4173 (void)InsertFormula(LU, LUIdx, NewF);
4184 LSRInstance::GenerateAllReuseFormulae() {
4187 for (
size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4188 LSRUse &LU = Uses[LUIdx];
4189 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4190 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
4191 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4192 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
4194 for (
size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4195 LSRUse &LU = Uses[LUIdx];
4196 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4197 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
4198 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4199 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
4200 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4201 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
4202 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4203 GenerateScales(LU, LUIdx, LU.Formulae[i]);
4205 for (
size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4206 LSRUse &LU = Uses[LUIdx];
4207 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4208 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
4211 GenerateCrossUseConstantOffsets();
4214 "After generating reuse formulae:\n";
4215 print_uses(
dbgs()));
4220 void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4225 bool ChangedFormulae =
false;
4230 using BestFormulaeTy =
4233 BestFormulaeTy BestFormulae;
4235 for (
size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4236 LSRUse &LU = Uses[LUIdx];
4241 for (
size_t FIdx = 0, NumForms = LU.Formulae.size();
4242 FIdx != NumForms; ++FIdx) {
4243 Formula &F = LU.Formulae[FIdx];
4254 CostF.RateFormula(TTI, F, Regs, VisitedRegs, L, SE, DT, LU, &LoserRegs);
4255 if (CostF.isLoser()) {
4267 for (
const SCEV *Reg : F.BaseRegs) {
4268 if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
4272 RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
4278 std::pair<BestFormulaeTy::const_iterator, bool>
P =
4279 BestFormulae.insert(std::make_pair(Key, FIdx));
4283 Formula &Best = LU.Formulae[P.first->second];
4287 CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, SE, DT, LU);
4288 if (CostF.isLess(CostBest, TTI))
4292 " in favor of formula ";
4293 Best.print(
dbgs());
dbgs() <<
'\n');
4296 ChangedFormulae =
true;
4298 LU.DeleteFormula(F);
4306 LU.RecomputeRegs(LUIdx, RegUses);
4309 BestFormulae.clear();
4314 "After filtering out undesirable candidates:\n";
4322 size_t LSRInstance::EstimateSearchSpaceComplexity()
const {
4324 for (
const LSRUse &LU : Uses) {
4325 size_t FSize = LU.Formulae.size();
4340 void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4344 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by eliminating formulae " 4345 "which use a superset of registers used by other " 4348 for (
size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4349 LSRUse &LU = Uses[LUIdx];
4351 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4352 Formula &F = LU.Formulae[i];
4357 I = F.BaseRegs.begin(),
E = F.BaseRegs.end(); I !=
E; ++
I) {
4360 NewF.BaseOffset +=
C->getValue()->getSExtValue();
4361 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4362 (I - F.BaseRegs.begin()));
4363 if (LU.HasFormulaWithSameRegs(NewF)) {
4366 LU.DeleteFormula(F);
4372 }
else if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
4373 if (
GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
4377 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4378 (I - F.BaseRegs.begin()));
4379 if (LU.HasFormulaWithSameRegs(NewF)) {
4382 LU.DeleteFormula(F);
4393 LU.RecomputeRegs(LUIdx, RegUses);
4402 void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4407 dbgs() <<
"The search space is too complex.\n" 4408 "Narrowing the search space by assuming that uses separated " 4409 "by a constant offset will use the same registers.\n");
4413 for (
size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4414 LSRUse &LU = Uses[LUIdx];
4415 for (
const Formula &F : LU.Formulae) {
4416 if (F.BaseOffset == 0 || (F.Scale != 0 && F.Scale != 1))
4419 LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
4423 if (!reconcileNewOffset(*LUThatHas, F.BaseOffset,
false,
4424 LU.Kind, LU.AccessTy))
4429 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4432 for (LSRFixup &
Fixup : LU.Fixups) {
4433 Fixup.Offset += F.BaseOffset;
4434 LUThatHas->pushFixup(Fixup);
4435 LLVM_DEBUG(
dbgs() <<
"New fixup has offset " << Fixup.Offset <<
'\n');
4440 for (
size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
4441 Formula &F = LUThatHas->Formulae[i];
4442 if (!
isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4443 LUThatHas->Kind, LUThatHas->AccessTy, F)) {
4445 LUThatHas->DeleteFormula(F);
4453 LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
4456 DeleteUse(LU, LUIdx);
4469 void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
4473 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by re-filtering out " 4474 "undesirable dedicated registers.\n");
4476 FilterOutUndesirableDedicatedRegisters();
4491 void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
4496 dbgs() <<
"The search space is too complex.\n" 4497 "Narrowing the search space by choosing the best Formula " 4498 "from the Formulae with the same Scale and ScaledReg.\n");
4503 BestFormulaeTy BestFormulae;
4505 bool ChangedFormulae =
false;
4510 for (
size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4511 LSRUse &LU = Uses[LUIdx];
4516 auto IsBetterThan = [&](Formula &FA, Formula &FB) {
4521 size_t FARegNum = 0;
4522 for (
const SCEV *Reg : FA.BaseRegs) {
4523 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
4524 FARegNum += (NumUses - UsedByIndices.
count() + 1);
4526 size_t FBRegNum = 0;
4527 for (
const SCEV *Reg : FB.BaseRegs) {
4528 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
4529 FBRegNum += (NumUses - UsedByIndices.
count() + 1);
4531 if (FARegNum != FBRegNum)
4532 return FARegNum < FBRegNum;
4536 Cost CostFA, CostFB;
4538 CostFA.RateFormula(TTI, FA, Regs, VisitedRegs, L, SE, DT, LU);
4540 CostFB.RateFormula(TTI, FB, Regs, VisitedRegs, L, SE, DT, LU);
4541 return CostFA.isLess(CostFB, TTI);
4545 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
4547 Formula &F = LU.Formulae[FIdx];
4550 auto P = BestFormulae.insert({{F.ScaledReg, F.Scale}, FIdx});
4554 Formula &Best = LU.Formulae[
P.first->second];
4555 if (IsBetterThan(F, Best))
4559 " in favor of formula ";
4560 Best.print(
dbgs());
dbgs() <<
'\n');
4562 ChangedFormulae =
true;
4564 LU.DeleteFormula(F);
4570 LU.RecomputeRegs(LUIdx, RegUses);
4573 BestFormulae.clear();
4578 "After filtering out undesirable candidates:\n";
4625 void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
4639 for (
const SCEV *Reg : RegUses) {
4640 if (UniqRegs.
count(Reg))
4643 for (
const LSRUse &LU : Uses) {
4644 if (!LU.Regs.count(Reg))
4646 float P = LU.getNotSelectedProbability(Reg);
4652 RegNumMap.
insert(std::make_pair(Reg, PNotSel));
4656 dbgs() <<
"Narrowing the search space by deleting costly formulas\n");
4659 for (
size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4660 LSRUse &LU = Uses[LUIdx];
4662 if (LU.Formulae.size() < 2)
4667 float FMinRegNum = LU.Formulae[0].getNumRegs();
4668 float FMinARegNum = LU.Formulae[0].getNumRegs();
4670 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4671 Formula &F = LU.Formulae[i];
4674 for (
const SCEV *BaseReg : F.BaseRegs) {
4675 if (UniqRegs.
count(BaseReg))
4677 FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
4678 if (isa<SCEVAddRecExpr>(BaseReg))
4680 RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
4682 if (
const SCEV *ScaledReg = F.ScaledReg) {
4683 if (!UniqRegs.
count(ScaledReg)) {
4685 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
4686 if (isa<SCEVAddRecExpr>(ScaledReg))
4688 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
4691 if (FMinRegNum > FRegNum ||
4692 (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
4693 FMinRegNum = FRegNum;
4694 FMinARegNum = FARegNum;
4699 dbgs() <<
" with min reg num " << FMinRegNum <<
'\n');
4701 std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
4702 while (LU.Formulae.size() != 1) {
4705 LU.Formulae.pop_back();
4707 LU.RecomputeRegs(LUIdx, RegUses);
4708 assert(LU.Formulae.size() == 1 &&
"Should be exactly 1 min regs formula");
4709 Formula &F = LU.Formulae[0];
4712 UniqRegs.
insert(F.BaseRegs.begin(), F.BaseRegs.end());
4714 UniqRegs.
insert(F.ScaledReg);
4722 void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
4733 const SCEV *Best =
nullptr;
4734 unsigned BestNum = 0;
4735 for (
const SCEV *Reg : RegUses) {
4736 if (Taken.
count(Reg))
4740 BestNum = RegUses.getUsedByIndices(Reg).count();
4742 unsigned Count = RegUses.getUsedByIndices(Reg).count();
4743 if (Count > BestNum) {
4750 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by assuming " << *Best
4751 <<
" will yield profitable reuse.\n");
4756 for (
size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4757 LSRUse &LU = Uses[LUIdx];
4758 if (!LU.Regs.count(Best))
continue;
4761 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4762 Formula &F = LU.Formulae[i];
4763 if (!F.referencesReg(Best)) {
4765 LU.DeleteFormula(F);
4769 assert(e != 0 &&
"Use has no formulae left! Is Regs inconsistent?");
4775 LU.RecomputeRegs(LUIdx, RegUses);
4786 void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
4787 NarrowSearchSpaceByDetectingSupersets();
4788 NarrowSearchSpaceByCollapsingUnrolledCode();
4789 NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
4791 NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
4793 NarrowSearchSpaceByDeletingCostlyFormulas();
4795 NarrowSearchSpaceByPickingWinnerRegs();
4802 const Cost &CurCost,
4815 const LSRUse &LU = Uses[Workspace.
size()];
4822 for (
const SCEV *S : CurRegs)
4823 if (LU.Regs.count(S))
4828 for (
const Formula &F : LU.Formulae) {
4832 int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.
size());
4833 for (
const SCEV *Reg : ReqRegs) {
4834 if ((F.ScaledReg && F.ScaledReg == Reg) ||
4837 if (NumReqRegsToFind == 0)
4841 if (NumReqRegsToFind != 0) {
4851 NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, SE, DT, LU);
4852 if (NewCost.isLess(SolutionCost, TTI)) {
4854 if (Workspace.
size() != Uses.size()) {
4855 SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
4856 NewRegs, VisitedRegs);
4857 if (F.getNumRegs() == 1 && Workspace.
size() == 1)
4858 VisitedRegs.
insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
4861 dbgs() <<
".\n Regs:";
for (
const SCEV *S
4866 SolutionCost = NewCost;
4867 Solution = Workspace;
4879 SolutionCost.Lose();
4883 Workspace.
reserve(Uses.size());
4886 SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
4887 CurRegs, VisitedRegs);
4888 if (Solution.
empty()) {
4895 "The chosen solution requires ";
4896 SolutionCost.print(
dbgs());
dbgs() <<
":\n";
4897 for (
size_t i = 0, e = Uses.size(); i != e; ++i) {
4899 Uses[i].print(
dbgs());
4902 Solution[i]->print(
dbgs());
4906 assert(Solution.
size() == Uses.size() &&
"Malformed solution!");
4918 bool AllDominate =
true;
4922 if (isa<CatchSwitchInst>(Tentative))
4926 if (Inst == Tentative || !DT.
dominates(Inst, Tentative)) {
4927 AllDominate =
false;
4933 (!BetterPos || !DT.
dominates(Inst, BetterPos)))
4943 const Loop *IPLoop = LI.getLoopFor(IP->getParent());
4944 unsigned IPLoopDepth = IPLoop ? IPLoop->
getLoopDepth() : 0;
4948 if (!Rung)
return IP;
4949 Rung = Rung->getIDom();
4950 if (!Rung)
return IP;
4951 IDom = Rung->getBlock();
4954 const Loop *IDomLoop = LI.getLoopFor(IDom);
4955 unsigned IDomDepth = IDomLoop ? IDomLoop->
getLoopDepth() : 0;
4956 if (IDomDepth <= IPLoopDepth &&
4957 (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
4978 if (
Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
4980 if (LU.Kind == LSRUse::ICmpZero)
4982 dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
4984 if (LF.PostIncLoops.count(L)) {
4985 if (LF.isUseFullyOutsideLoop(L))
4992 for (
const Loop *PIL : LF.PostIncLoops) {
4993 if (PIL == L)
continue;
4998 if (!ExitingBlocks.
empty()) {
5000 for (
unsigned i = 1, e = ExitingBlocks.
size(); i != e; ++i)
5006 assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad()
5007 && !isa<DbgInfoIntrinsic>(LowestIP) &&
5008 "Insertion point must be a normal instruction");
5015 while (isa<PHINode>(IP)) ++IP;
5018 while (IP->isEHPad()) ++IP;
5021 while (isa<DbgInfoIntrinsic>(IP)) ++IP;
5034 Value *LSRInstance::Expand(
const LSRUse &LU,
const LSRFixup &LF,
5038 if (LU.RigidFormula)
5039 return LF.OperandValToReplace;
5043 IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter);
5051 Type *OpTy = LF.OperandValToReplace->getType();
5053 Type *Ty = F.getType();
5067 for (
const SCEV *Reg : F.BaseRegs) {
5068 assert(!Reg->
isZero() &&
"Zero allocated in a base register!");
5076 Value *ICmpScaledV =
nullptr;
5078 const SCEV *ScaledS = F.ScaledReg;
5084 if (LU.Kind == LSRUse::ICmpZero) {
5094 "The only scale supported by ICmpZero uses is -1!");
5137 int64_t
Offset = (uint64_t)F.BaseOffset + LF.Offset;
5139 if (LU.Kind == LSRUse::ICmpZero) {
5156 int64_t UnfoldedOffset = F.UnfoldedOffset;
5157 if (UnfoldedOffset != 0) {
5175 if (LU.Kind == LSRUse::ICmpZero) {
5176 ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
5178 assert(!F.BaseGV &&
"ICmp does not support folding a global value and " 5179 "a scale at the same time!");
5180 if (F.Scale == -1) {
5181 if (ICmpScaledV->
getType() != OpTy) {
5185 ICmpScaledV, OpTy,
"tmp", CI);
5192 assert((F.Scale == 0 || F.Scale == 1) &&
5193 "ICmp does not support folding a global value and " 5194 "a scale at the same time!");
5212 void LSRInstance::RewriteForPHI(
5213 PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
const Formula &F,
5228 Loop *PNLoop = LI.getLoopFor(Parent);
5229 if (!PNLoop || Parent != PNLoop->
getHeader()) {
5235 .setMergeIdenticalEdges()
5236 .setDontDeleteUselessPHIs());
5260 std::pair<DenseMap<BasicBlock *, Value *>::iterator,
bool> Pair =
5261 Inserted.
insert(std::make_pair(BB, static_cast<Value *>(
nullptr)));
5269 Type *OpTy = LF.OperandValToReplace->getType();
5274 FullV, LF.OperandValToReplace->getType(),
5278 Pair.first->second = FullV;
5286 void LSRInstance::Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
5291 if (
PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
5292 RewriteForPHI(PN, LU, LF, F, Rewriter, DeadInsts);
5295 Expand(LU, LF, F, LF.UserInst->getIterator(),
Rewriter, DeadInsts);
5298 Type *OpTy = LF.OperandValToReplace->getType();
5299 if (FullV->
getType() != OpTy) {
5302 FullV, OpTy,
"tmp", LF.UserInst);
5311 if (LU.Kind == LSRUse::ICmpZero)
5312 LF.UserInst->setOperand(0, FullV);
5314 LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
5322 void LSRInstance::ImplementSolution(
5338 for (
const IVChain &Chain : IVChainVec) {
5339 if (
PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))
5344 for (
size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)
5345 for (
const LSRFixup &
Fixup : Uses[LUIdx].
Fixups) {
5346 Rewrite(Uses[LUIdx],
Fixup, *Solution[LUIdx], Rewriter, DeadInsts);
5350 for (
const IVChain &Chain : IVChainVec) {
5351 GenerateIVChain(Chain, Rewriter, DeadInsts);
5364 : IU(IU), SE(SE), DT(DT), LI(LI), TTI(TTI), L(L) {
5370 if (IU.
empty())
return;
5374 unsigned NumUsers = 0;
5376 if (++NumUsers > MaxIVUsers) {
5378 LLVM_DEBUG(
dbgs() <<
"LSR skipping loop, too many IV Users in " << U
5385 if (
auto *PN = dyn_cast<PHINode>(U.getUser())) {
5387 if (isa<FuncletPadInst>(FirstNonPHI) ||
5388 isa<CatchSwitchInst>(FirstNonPHI))
5390 if (isa<CatchSwitchInst>(PredBB->getFirstNonPHI()))
5403 Rung; Rung = Rung->
getIDom()) {
5406 if (DomLoop && DomLoop->
getHeader() == BB) {
5418 OptimizeLoopTermCond();
5421 if (IU.empty())
return;
5431 CollectInterestingTypesAndFactors();
5432 CollectFixupsAndInitialFormulae();
5433 CollectLoopInvariantFixupsAndFormulae();
5439 print_uses(
dbgs()));
5443 GenerateAllReuseFormulae();
5445 FilterOutUndesirableDedicatedRegisters();
5446 NarrowSearchSpaceUsingHeuristics();
5456 if (Solution.
empty())
5461 for (
const LSRUse &LU : Uses) {
5462 for (
const Formula &F : LU.Formulae)
5464 F) &&
"Illegal formula generated!");
5469 ImplementSolution(Solution);
5472 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 5473 void LSRInstance::print_factors_and_types(
raw_ostream &OS)
const {
5474 if (Factors.empty() && Types.empty())
return;
5476 OS <<
"LSR has identified the following interesting factors and types: ";
5479 for (int64_t Factor : Factors) {
5480 if (!First) OS <<
", ";
5482 OS <<
'*' << Factor;
5485 for (
Type *Ty : Types) {
5486 if (!First) OS <<
", ";
5488 OS <<
'(' << *Ty <<
')';
5493 void LSRInstance::print_fixups(
raw_ostream &OS)
const {
5494 OS <<
"LSR is examining the following fixup sites:\n";
5495 for (
const LSRUse &LU : Uses)
5496 for (
const LSRFixup &LF : LU.Fixups) {
5503 void LSRInstance::print_uses(
raw_ostream &OS)
const {
5504 OS <<
"LSR is examining the following uses:\n";
5505 for (
const LSRUse &LU : Uses) {
5509 for (
const Formula &F : LU.Formulae) {
5518 print_factors_and_types(OS);
5530 class LoopStrengthReduce :
public LoopPass {
5534 LoopStrengthReduce();
5543 LoopStrengthReduce::LoopStrengthReduce() :
LoopPass(
ID) {
5547 void LoopStrengthReduce::getAnalysisUsage(
AnalysisUsage &AU)
const {
5570 bool Changed =
false;
5573 Changed |= LSRInstance(L, IU, SE, DT, LI, TTI).getChanged();
5598 auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
5599 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
5600 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
5601 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
5602 const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
5620 "Loop Strength Reduction",
false,
false)
Pass interface - Implemented by all 'passes'.
static bool isValueValidForType(Type *Ty, uint64_t V)
This static method returns true if the type Ty is big enough to represent the value V...
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
A parsed version of the target data layout string in and methods for querying it. ...
const_iterator end(StringRef path)
Get end iterator over path.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
iterator_range< use_iterator > uses()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
DiagnosticInfoOptimizationBase::Argument NV
typename SuperClass::const_iterator const_iterator
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if this AddRec is already a phi in its loop.
Pass * createLoopStrengthReducePass()
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
const SCEV * getConstant(ConstantInt *V)
This class represents lattice values for constants.
size_type size() const
Determine the number of elements in the SetVector.
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
an instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE)
Normalize S to be post-increment for all loops present in Loops.
size_t getNumOperands() const
Implements a dense probed hash-table based set.
unsigned getLoopDepth() const
Return the nesting level of this loop.
void push_back(const T &Elt)
static cl::opt< unsigned > ComplexityLimit("lsr-complexity-limit", cl::Hidden, cl::init(std::numeric_limits< uint16_t >::max()), cl::desc("LSR search space complexity limit"))
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
The main scalar evolution driver.
bool isZero() const
Return true if the expression is a constant zero.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
static const unsigned MaxIVUsers
MaxIVUsers is an arbitrary threshold that provides an early opportunity for bail out.
bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block...
void initializeLoopStrengthReducePass(PassRegistry &)
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
void setDebugType(const char *s)
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
void resize(unsigned N, bool t=false)
Grow or shrink the bitvector.
static void dump(StringRef Title, SpillInfo const &Spills)
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
An instruction for reading from memory.
an instruction that atomically reads a memory location, combines it with another value, and then stores the result back.
Value * getCondition() const
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
iv Induction Variable Users
This defines the Use class.
void reserve(size_type N)
bool hasNoSignedWrap() const
iterator end()
Get an iterator to the end of the SetVector.
INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce", "Loop Strength Reduction", false, false) INITIALIZE_PASS_END(LoopStrengthReduce
This is the base class for unary cast operator classes.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
SI optimize exec mask operations pre RA
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
#define INITIALIZE_PASS_DEPENDENCY(depName)
This class represents the LLVM 'select' instruction.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
const Loop * getLoop() const
static cl::opt< bool > InsnsCost("lsr-insns-cost", cl::Hidden, cl::init(true), cl::desc("Add instruction count to a LSR cost model"))
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Option class for critical edge splitting.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void clearPostInc()
Disable all post-inc expansion.
A Use represents the edge between a Value definition and its users.
static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE)
Helper for CollectChains that finds an IV operand (computed by an AddRec in this loop) within [OI...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
LLVMContext & getContext() const
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
void setName(const Twine &Name)
Change the name of the value.
This node represents multiplication of some number of SCEVs.
This file implements a class to represent arbitrary precision integral constant values and operations...
const APInt & getAPInt() const
BlockT * getHeader() const
ConstantInt * getValue() const
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following: ...
Type * getType() const
All values are typed, get the type of this value.
bool insert(const value_type &X)
Insert a new element into the SetVector.
static bool isEqual(const Function &Caller, const Function &Callee)
This node represents a polynomial recurrence on the trip count of the specified loop.
iterator begin()
Get an iterator to the beginning of the SetVector.
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Examine each PHI in the given block and delete it if it is dead.
iterator_range< const_set_bits_iterator > set_bits() const
This header provides classes for managing per-loop analyses.
const APInt & getValue() const
Return the constant as an APInt value reference.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions())
If this edge is a critical edge, insert a new node to split the critical edge.
AnalysisUsage & addPreservedID(const void *ID)
An instruction for storing to memory.
op_iterator op_begin() const
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
int getFPMantissaWidth() const
Return the width of the mantissa of this type.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
This class represents a truncation of integer types.
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Value * getOperand(unsigned i) const
Class to represent pointers.
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
static bool DeleteTriviallyDeadInstructions(SmallVectorImpl< WeakTrackingVH > &DeadInsts)
If any of the instructions in the specified set are trivially dead, delete them and see if this makes...
void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock *> Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock *> &NewBBs, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
iterator find(const_arg_type_t< KeyT > Val)
static cl::opt< bool > FilterSameScaledReg("lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Narrow LSR search space by filtering non-optimal formulae" " with the same ScaledReg and Scale"))
void clear()
Erase the contents of the InsertedExpressions map so that users trying to expand the same expression ...
static bool isCompatibleIVType(Value *LVal, Value *RVal)
Return true if we allow an IV chain to include both types.
static bool isAddressUse(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Returns true if the specified instruction is using the specified value as an address.
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
initializer< Ty > init(const Ty &Val)
const SCEV * getOperand(unsigned i) const
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
bool isAllOnesValue() const
Determine if all bits are set.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
A set of analyses that are preserved following a run of a transformation pass.
void setUser(Instruction *NewUser)
setUser - Assign a new user instruction for this use.
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, int64_t MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale)
Test whether we know how to expand the current formula.
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE)
Return true if the given mul can be sign-extended without changing its value.
LLVM Basic Block Representation.
PointerIntPair - This class implements a pair of a pointer and small integer.
This class represents a binary unsigned division operation.
The instances of the Type class are immutable: once they are created, they are never changed...
This is an important class for using LLVM in a threaded context.
void setInsertPoint(Instruction *IP)
Set the current insertion point.
Conditional or Unconditional Branch instruction.
static bool isProfitableChain(IVChain &Chain, SmallPtrSetImpl< Instruction *> &Users, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Return true if the number of registers needed for the chain is estimated to be less than the number r...
DomTreeNodeBase * getIDom() const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
LLVM_NODISCARD bool empty() const
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
static const unsigned UnknownAddressSpace
std::pair< iterator, bool > insert(const ValueT &V)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
static cl::opt< bool > EnablePhiElim("enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination"))
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Represent the analysis usage information of a pass.
static Type * getVoidTy(LLVMContext &C)
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg)
This instruction compares its operands according to the predicate given to the constructor.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
op_iterator op_end() const
static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a constant integer value, return that integer value, and mutate S to point to a new SCEV with that value excluded.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
self_iterator getIterator()
iterator_range< block_iterator > blocks()
const SCEV * getStart() const
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
static Constant * getAllOnesValue(Type *Ty)
void getExitingBlocks(SmallVectorImpl< BlockT *> &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
static const SCEV * CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl< const SCEV *> &Ops, const Loop *L, ScalarEvolution &SE, unsigned Depth=0)
Split S into subexpressions which can be pulled out into separate registers.
iterator erase(const_iterator CI)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
static unsigned getIncomingValueNumForOperand(unsigned i)
static wasm::ValType getType(const TargetRegisterClass *RC)
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
const SCEV * getMulExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE)
Return true if the given add can be sign-extended without changing its value.
TargetTransformInfo & TTI
void sort(IteratorTy Start, IteratorTy End)
void setChainedPhi(PHINode *PN)
PowerPC TLS Dynamic Call Fixup
static const SCEV * getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits=false)
Return an expression for LHS /s RHS, if it can be determined and if the remainder is known to be zero...
bool isLandingPad() const
Return true if this basic block is a landing pad.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
unsigned getSCEVType() const
A SetVector that performs no allocations if smaller than a certain size.
Iterator for intrusive lists based on ilist_node.
This is the shared class of boolean and integer constants.
Type * getType() const
Return the LLVM type of this SCEV expression.
static cl::opt< bool > StressIVChain("stress-ivchain", cl::Hidden, cl::init(false), cl::desc("Stress test LSR IV chains"))
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
AnalysisUsage & addRequiredID(const void *ID)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Module.h This file contains the declarations for the Module class.
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
void setIVIncInsertPos(const Loop *L, Instruction *Pos)
Set the current IV increment loop and position.
static Value * getWideOperand(Value *Oper)
IVChain logic must consistently peek base TruncInst operands, so wrap it in a convenient helper...
LLVM_NODISCARD T pop_back_val()
CHAIN = SC CHAIN, Imm128 - System call.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
size_t size() const
Returns the number of bits in this bitvector.
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)
Check if the addressing mode defined by F is completely folded in LU at isel time.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
unsigned getNumIncomingValues() const
Return the number of incoming edges.
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
bool isInsertedInstruction(Instruction *I) const
Return true if the specified instruction was inserted by the code rewriter.
void setOperand(unsigned i, Value *Val)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl< const SCEV *> &Good, SmallVectorImpl< const SCEV *> &Bad, ScalarEvolution &SE)
Recursion helper for initialMatch.
bool isTrueWhenEqual() const
This is just a convenience.
Class for arbitrary precision integers.
This node represents an addition of some number of SCEVs.
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
This class represents a signed maximum selection.
typename SuperClass::iterator iterator
iterator_range< user_iterator > users()
InstListType::iterator iterator
Instruction iterators...
This class uses information about analyze scalars to rewrite expressions in canonical form...
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
static void clear(coro::Shape &Shape)
loop Loop Strength Reduction
iterator insert(iterator I, T &&Elt)
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Virtual Register Rewriter
bool operator!=(uint64_t V1, const APInt &V2)
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate getPredicate() const
Return the predicate for this instruction.
static bool mayUsePostIncMode(const TargetTransformInfo &TTI, LSRUse &LU, const SCEV *S, const Loop *L, ScalarEvolution &SE)
Return true if the SCEV represents a value that may end up as a post-increment operation.
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
void emplace_back(ArgTypes &&... Args)
This class represents an analyzed expression in the program.
Analysis pass that exposes the IVUsers for a loop.
bool hasNoUnsignedWrap() const
LLVM_NODISCARD bool empty() const
Represents a single loop in the control flow graph.
APInt srem(const APInt &RHS) const
Function for signed remainder operation.
StringRef getName() const
Return a constant reference to the value's name.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
Value * getOperandValToReplace() const
getOperandValToReplace - Return the Value of the operand in the user instruction that this IVStrideUs...
static Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static const SCEV * getExprBase(const SCEV *S)
Return an approximation of this SCEV expression's "base", or NULL for any constant.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
This class represents a cast unsigned integer to floating point.
static MemAccessTy getAccessType(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Return the type of the memory being accessed.
This class represents an unsigned maximum selection.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
bool isUnconditional() const
int find_first() const
Returns the index of the first set bit, -1 if none of the bits are set.
void transformToPostInc(const Loop *L)
transformToPostInc - Transform the expression to post-inc form for the given loop.
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI)
Return true if the IVInc can be folded into an addressing mode.
static cl::opt< bool > LSRExpNarrow("lsr-exp-narrow", cl::Hidden, cl::init(false), cl::desc("Narrow LSR complex solution using" " expectation of registers number"))
unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetTransformInfo *TTI=nullptr)
replace congruent phis with their most canonical representative.
const SCEV * getBackedgeTakenCount(const Loop *L)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
This class represents a cast from signed integer to floating point.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getMinSignedBits() const
Get the minimum bit size for this signed APInt.
const SCEV * denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE)
Denormalize S to be post-increment for all loops present in Loops.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
LLVM Value Representation.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
static GlobalValue * ExtractSymbol(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a GlobalValue address, return that symbol, and mutate S to point to a n...
A vector that has set insertion semantics.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
IVStrideUse - Keep track of one use of a strided induction variable.
This class implements an extremely fast bulk output stream that can only output to a stream...
void disableCanonicalMode()
Disable the behavior of expanding expressions in canonical form rather than in a more literal form...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
const SCEV * getUnknown(Value *V)
The legacy pass manager's analysis pass to compute loop information.
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
bool hasOneUse() const
Return true if there is exactly one user of this value.
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
bool operator==(uint64_t V1, const APInt &V2)
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop. ...
This node is a base class providing common functionality for n'ary operators.
This header defines various interfaces for pass management in LLVM.
void setIncomingValue(unsigned i, Value *V)
for(unsigned i=Desc.getNumOperands(), e=OldMI.getNumOperands();i !=e;++i)
void setPostInc(const PostIncLoopSet &L)
Enable post-inc expansion for addrecs referring to the given loops.
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI)
Information about a load/store intrinsic defined by the target.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
size_type count() const
Returns the number of bits which are set.
static bool isHighCostExpansion(const SCEV *S, SmallPtrSetImpl< const SCEV *> &Processed, ScalarEvolution &SE)
Check if expanding this expression is likely to incur significant cost.
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if the given addrec can be sign-extended without changing its value.
const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
A wrapper class for inspecting calls to intrinsic functions.
const BasicBlock * getParent() const
This class represents a constant integer value.
const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.