46 #include "llvm/Config/llvm-config.h" 102 using namespace llvm;
105 #define DEBUG_TYPE "sroa" 107 STATISTIC(NumAllocasAnalyzed,
"Number of allocas analyzed for replacement");
108 STATISTIC(NumAllocaPartitions,
"Number of alloca partitions formed");
109 STATISTIC(MaxPartitionsPerAlloca,
"Maximum number of partitions per alloca");
110 STATISTIC(NumAllocaPartitionUses,
"Number of alloca partition uses rewritten");
111 STATISTIC(MaxUsesPerAllocaPartition,
"Maximum number of uses of a partition");
112 STATISTIC(NumNewAllocas,
"Number of new, smaller allocas introduced");
113 STATISTIC(NumPromoted,
"Number of allocas promoted to SSA values");
114 STATISTIC(NumLoadsSpeculated,
"Number of loads speculated to allow promotion");
115 STATISTIC(NumDeleted,
"Number of instructions deleted");
116 STATISTIC(NumVectorized,
"Number of vectorized aggregates");
140 void SetNamePrefix(
const Twine &
P) { Prefix = P.
str(); }
161 uint64_t BeginOffset = 0;
164 uint64_t EndOffset = 0;
173 Slice(uint64_t BeginOffset, uint64_t EndOffset,
Use *U,
bool IsSplittable)
174 : BeginOffset(BeginOffset), EndOffset(EndOffset),
175 UseAndIsSplittable(U, IsSplittable) {}
177 uint64_t beginOffset()
const {
return BeginOffset; }
178 uint64_t endOffset()
const {
return EndOffset; }
180 bool isSplittable()
const {
return UseAndIsSplittable.
getInt(); }
181 void makeUnsplittable() { UseAndIsSplittable.
setInt(
false); }
183 Use *getUse()
const {
return UseAndIsSplittable.
getPointer(); }
185 bool isDead()
const {
return getUse() ==
nullptr; }
186 void kill() { UseAndIsSplittable.
setPointer(
nullptr); }
195 if (beginOffset() < RHS.beginOffset())
197 if (beginOffset() > RHS.beginOffset())
199 if (isSplittable() != RHS.isSplittable())
200 return !isSplittable();
201 if (endOffset() > RHS.endOffset())
208 uint64_t RHSOffset) {
209 return LHS.beginOffset() < RHSOffset;
213 return LHSOffset < RHS.beginOffset();
217 return isSplittable() == RHS.isSplittable() &&
218 beginOffset() == RHS.beginOffset() && endOffset() == RHS.endOffset();
228 template <>
struct isPodLike<Slice> {
static const bool value =
true; };
274 int OldSize = Slices.size();
275 Slices.append(NewSlices.
begin(), NewSlices.
end());
276 auto SliceI = Slices.begin() + OldSize;
278 std::inplace_merge(Slices.begin(), SliceI, Slices.end());
283 class partition_iterator;
297 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 309 template <
typename DerivedT,
typename RetT =
void>
class BuilderBase;
314 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 372 uint64_t BeginOffset, EndOffset;
382 Partition(iterator SI) : SI(SI), SJ(SI) {}
399 assert(BeginOffset < EndOffset &&
"Partitions must span some bytes!");
400 return EndOffset - BeginOffset;
405 bool empty()
const {
return SI == SJ; }
417 iterator
end()
const {
return SJ; }
451 uint64_t MaxSplitSliceEndOffset = 0;
468 "Cannot advance past the end of the slices!");
471 if (!P.SplitTails.
empty()) {
472 if (P.EndOffset >= MaxSplitSliceEndOffset) {
474 P.SplitTails.
clear();
475 MaxSplitSliceEndOffset = 0;
482 return S->endOffset() <=
488 return S->endOffset() == MaxSplitSliceEndOffset;
490 "Could not find the current max split slice offset!");
493 return S->endOffset() <= MaxSplitSliceEndOffset;
495 "Max split slice end offset is not actually the max!");
502 assert(P.SplitTails.
empty() &&
"Failed to clear the split slices!");
512 if (S.isSplittable() && S.endOffset() > P.EndOffset) {
513 P.SplitTails.push_back(&S);
514 MaxSplitSliceEndOffset =
515 std::max(S.endOffset(), MaxSplitSliceEndOffset);
523 P.BeginOffset = P.EndOffset;
524 P.EndOffset = MaxSplitSliceEndOffset;
531 if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
532 !P.SI->isSplittable()) {
533 P.BeginOffset = P.EndOffset;
534 P.EndOffset = P.SI->beginOffset();
544 P.BeginOffset = P.SplitTails.
empty() ? P.SI->beginOffset() : P.EndOffset;
545 P.EndOffset = P.SI->endOffset();
550 if (!P.SI->isSplittable()) {
553 assert(P.BeginOffset == P.SI->beginOffset());
557 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
558 if (!P.SJ->isSplittable())
559 P.EndOffset =
std::max(P.EndOffset, P.SJ->endOffset());
571 assert(P.SI->isSplittable() &&
"Forming a splittable partition!");
574 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
575 P.SJ->isSplittable()) {
576 P.EndOffset =
std::max(P.EndOffset, P.SJ->endOffset());
583 if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
584 assert(!P.SJ->isSplittable());
585 P.EndOffset = P.SJ->beginOffset();
592 "End iterators don't match between compared partition iterators!");
599 if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.
empty()) {
600 assert(P.SJ == RHS.P.SJ &&
601 "Same set of slices formed two different sized partitions!");
602 assert(P.SplitTails.size() == RHS.P.SplitTails.
size() &&
603 "Same slice position with differently sized non-empty split " 644 if (
PHINode *PN = dyn_cast<PHINode>(&I)) {
646 return PN->hasConstantValue();
661 const uint64_t AllocSize;
673 AllocSize(DL.getTypeAllocSize(AI.getAllocatedType())), AS(AS) {}
677 if (VisitedDeadInsts.
insert(&I).second)
678 AS.DeadUsers.push_back(&I);
682 bool IsSplittable =
false) {
685 if (Size == 0 || Offset.
uge(AllocSize)) {
686 LLVM_DEBUG(
dbgs() <<
"WARNING: Ignoring " << Size <<
" byte use @" 688 <<
" which has zero size or starts outside of the " 689 << AllocSize <<
" byte alloca:\n" 690 <<
" alloca: " << AS.AI <<
"\n" 691 <<
" use: " << I <<
"\n");
692 return markAsDead(I);
696 uint64_t EndOffset = BeginOffset +
Size;
704 assert(AllocSize >= BeginOffset);
705 if (Size > AllocSize - BeginOffset) {
706 LLVM_DEBUG(
dbgs() <<
"WARNING: Clamping a " << Size <<
" byte use @" 707 << Offset <<
" to remain within the " << AllocSize
709 <<
" alloca: " << AS.AI <<
"\n" 710 <<
" use: " << I <<
"\n");
711 EndOffset = AllocSize;
714 AS.Slices.
push_back(Slice(BeginOffset, EndOffset, U, IsSplittable));
719 return markAsDead(BC);
721 return Base::visitBitCastInst(BC);
726 return markAsDead(GEPI);
746 if (
StructType *STy = GTI.getStructTypeOrNull()) {
762 if (GEPOffset.
ugt(AllocSize))
763 return markAsDead(GEPI);
767 return Base::visitGetElementPtrInst(GEPI);
777 insertUse(I, Offset, Size, IsSplittable);
782 "All simple FCA loads should have been pre-split");
785 return PI.setAborted(&LI);
795 return PI.setEscapedAndAborted(&SI);
797 return PI.setAborted(&SI);
809 if (Size > AllocSize || Offset.
ugt(AllocSize - Size)) {
810 LLVM_DEBUG(
dbgs() <<
"WARNING: Ignoring " << Size <<
" byte store @" 811 << Offset <<
" which extends past the end of the " 812 << AllocSize <<
" byte alloca:\n" 813 <<
" alloca: " << AS.AI <<
"\n" 814 <<
" use: " << SI <<
"\n");
815 return markAsDead(SI);
819 "All simple FCA stores should have been pre-split");
826 if ((Length && Length->getValue() == 0) ||
827 (IsOffsetKnown && Offset.
uge(AllocSize)))
829 return markAsDead(II);
832 return PI.setAborted(&II);
841 if (Length && Length->
getValue() == 0)
843 return markAsDead(II);
847 if (VisitedDeadInsts.
count(&II))
851 return PI.setAborted(&II);
858 if (Offset.
uge(AllocSize)) {
860 MemTransferSliceMap.
find(&II);
861 if (MTPI != MemTransferSliceMap.
end())
862 AS.Slices[MTPI->second].kill();
863 return markAsDead(II);
867 uint64_t Size = Length ? Length->
getLimitedValue() : AllocSize - RawOffset;
874 return markAsDead(II);
876 return insertUse(II, Offset, Size,
false);
883 std::tie(MTPI, Inserted) =
884 MemTransferSliceMap.
insert(std::make_pair(&II, AS.Slices.
size()));
885 unsigned PrevIdx = MTPI->second;
887 Slice &PrevP = AS.Slices[PrevIdx];
891 if (!II.
isVolatile() && PrevP.beginOffset() == RawOffset) {
893 return markAsDead(II);
898 PrevP.makeUnsplittable();
902 insertUse(II, Offset, Size, Inserted && Length);
905 assert(AS.Slices[PrevIdx].getUse()->getUser() == &II &&
906 "Map index doesn't point back to a slice with this user.");
914 return PI.setAborted(&II);
920 insertUse(II, Offset, Size,
true);
924 Base::visitIntrinsicInst(II);
935 Uses.
push_back(std::make_pair(cast<Instruction>(*U), Root));
944 if (
LoadInst *LI = dyn_cast<LoadInst>(I)) {
948 if (
StoreInst *SI = dyn_cast<StoreInst>(I)) {
957 if (!
GEP->hasAllZeroIndices())
959 }
else if (!isa<BitCastInst>(I) && !isa<PHINode>(
I) &&
960 !isa<SelectInst>(I)) {
965 if (Visited.
insert(cast<Instruction>(U)).second)
966 Uses.
push_back(std::make_pair(I, cast<Instruction>(U)));
967 }
while (!Uses.
empty());
973 assert(isa<PHINode>(I) || isa<SelectInst>(I));
975 return markAsDead(I);
993 AS.DeadOperands.push_back(U);
999 return PI.setAborted(&I);
1002 uint64_t &Size = PHIOrSelectSizes[&
I];
1005 if (
Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&I, Size))
1006 return PI.setAborted(UnsafeI);
1015 if (Offset.
uge(AllocSize)) {
1016 AS.DeadOperands.push_back(U);
1020 insertUse(I, Offset, Size);
1023 void visitPHINode(
PHINode &PN) { visitPHINodeOrSelectInst(PN); }
1025 void visitSelectInst(
SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
1028 void visitInstruction(
Instruction &I) { PI.setAborted(&I); }
1033 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1036 PointerEscapingInstr(
nullptr) {
1044 assert(PointerEscapingInstr &&
"Did not track a bad instruction");
1054 std::mt19937 MT(static_cast<unsigned>(
1056 std::shuffle(Slices.begin(), Slices.end(), MT);
1065 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1069 printSlice(OS, I, Indent);
1071 printUse(OS, I, Indent);
1076 OS << Indent <<
"[" << I->beginOffset() <<
"," << I->endOffset() <<
")" 1077 <<
" slice #" << (I -
begin())
1078 << (I->isSplittable() ?
" (splittable)" :
"");
1083 OS << Indent <<
" used by: " << *I->getUse()->getUser() <<
"\n";
1087 if (PointerEscapingInstr) {
1088 OS <<
"Can't analyze slices for alloca: " << AI <<
"\n" 1089 <<
" A pointer to this alloca escaped by:\n" 1090 <<
" " << *PointerEscapingInstr <<
"\n";
1094 OS <<
"Slices of alloca: " << AI <<
"\n";
1095 for (const_iterator I =
begin(),
E =
end(); I !=
E; ++
I)
1104 #endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1110 uint64_t EndOffset) {
1112 bool TyIsCommon =
true;
1118 Use *U = I->getUse();
1119 if (isa<IntrinsicInst>(*U->getUser()))
1121 if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset)
1124 Type *UserTy =
nullptr;
1125 if (
LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1126 UserTy = LI->getType();
1127 }
else if (
StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
1128 UserTy = SI->getValueOperand()->getType();
1131 if (
IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) {
1136 if (UserITy->getBitWidth() % 8 != 0 ||
1137 UserITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
1142 if (!ITy || ITy->
getBitWidth() < UserITy->getBitWidth())
1148 if (!UserTy || (Ty && Ty != UserTy))
1154 return TyIsCommon ? Ty : ITy;
1181 unsigned MaxAlign = 0;
1182 bool HaveLoad =
false;
1197 if (BBI->mayWriteToMemory())
1242 Type *LoadTy = cast<PointerType>(PN.
getType())->getElementType();
1243 IRBuilderTy PHIBuilder(&PN);
1245 PN.
getName() +
".sroa.speculated");
1278 IRBuilderTy PredBuilder(TI);
1281 InVal, (PN.
getName() +
".sroa.speculate.load." + Pred->
getName()));
1282 ++NumLoadsSpeculated;
1287 InjectedLoads[Pred] =
Load;
1332 IRBuilderTy IRB(&SI);
1340 IRB.SetInsertPoint(LI);
1342 IRB.CreateLoad(TV, LI->
getName() +
".sroa.speculate.load.true");
1344 IRB.CreateLoad(FV, LI->
getName() +
".sroa.speculate.load.false");
1345 NumLoadsSpeculated += 2;
1359 LI->
getName() +
".sroa.speculated");
1374 if (Indices.
empty())
1379 if (Indices.
size() == 1 && cast<ConstantInt>(Indices.
back())->
isZero())
1382 return IRB.CreateInBoundsGEP(
nullptr, BasePtr, Indices,
1383 NamePrefix +
"sroa_idx");
1400 return buildGEP(IRB, BasePtr, Indices, NamePrefix);
1407 unsigned NumLayers = 0;
1408 Type *ElementTy = Ty;
1413 if (
ArrayType *ArrayTy = dyn_cast<ArrayType>(ElementTy)) {
1414 ElementTy = ArrayTy->getElementType();
1415 Indices.
push_back(IRB.getIntN(OffsetSize, 0));
1416 }
else if (
VectorType *VectorTy = dyn_cast<VectorType>(ElementTy)) {
1417 ElementTy = VectorTy->getElementType();
1419 }
else if (
StructType *STy = dyn_cast<StructType>(ElementTy)) {
1420 if (STy->element_begin() == STy->element_end())
1422 ElementTy = *STy->element_begin();
1428 }
while (ElementTy != TargetTy);
1429 if (ElementTy != TargetTy)
1430 Indices.
erase(Indices.
end() - NumLayers, Indices.
end());
1432 return buildGEP(IRB, BasePtr, Indices, NamePrefix);
1455 if (
VectorType *VecTy = dyn_cast<VectorType>(Ty)) {
1457 if (ElementSizeInBits % 8 != 0) {
1462 APInt NumSkippedElements = Offset.
sdiv(ElementSize);
1463 if (NumSkippedElements.ugt(VecTy->getNumElements()))
1465 Offset -= NumSkippedElements * ElementSize;
1466 Indices.
push_back(IRB.getInt(NumSkippedElements));
1468 Offset, TargetTy, Indices, NamePrefix);
1471 if (
ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
1472 Type *ElementTy = ArrTy->getElementType();
1474 APInt NumSkippedElements = Offset.
sdiv(ElementSize);
1475 if (NumSkippedElements.ugt(ArrTy->getNumElements()))
1478 Offset -= NumSkippedElements * ElementSize;
1479 Indices.
push_back(IRB.getInt(NumSkippedElements));
1481 Indices, NamePrefix);
1500 Indices, NamePrefix);
1528 if (ElementSize == 0)
1530 APInt NumSkippedElements = Offset.
sdiv(ElementSize);
1532 Offset -= NumSkippedElements * ElementSize;
1533 Indices.
push_back(IRB.getInt(NumSkippedElements));
1535 Indices, NamePrefix);
1564 Value *OffsetPtr =
nullptr;
1565 Value *OffsetBasePtr;
1569 Value *Int8Ptr =
nullptr;
1578 if (!
GEP->accumulateConstantOffset(DL, GEPOffset))
1580 Offset += GEPOffset;
1581 Ptr =
GEP->getPointerOperand();
1582 if (!Visited.
insert(Ptr).second)
1589 Indices, NamePrefix)) {
1593 if (OffsetPtr && OffsetPtr != OffsetBasePtr)
1594 if (
Instruction *I = dyn_cast<Instruction>(OffsetPtr)) {
1595 assert(I->use_empty() &&
"Built a GEP with uses some how!");
1596 I->eraseFromParent();
1599 OffsetBasePtr = Ptr;
1613 Ptr = cast<Operator>(Ptr)->getOperand(0);
1614 }
else if (
GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {
1615 if (GA->isInterposable())
1617 Ptr = GA->getAliasee();
1622 }
while (Visited.
insert(Ptr).second);
1626 Int8Ptr = IRB.CreateBitCast(
1628 NamePrefix +
"sroa_raw_cast");
1632 OffsetPtr = Int8PtrOffset == 0
1634 : IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Int8Ptr,
1635 IRB.getInt(Int8PtrOffset),
1636 NamePrefix +
"sroa_raw_idx");
1642 Ptr = IRB.CreateBitCast(Ptr, PointerTy, NamePrefix +
"sroa_cast");
1652 if (
auto *LI = dyn_cast<LoadInst>(I)) {
1653 Alignment = LI->getAlignment();
1655 }
else if (
auto *SI = dyn_cast<StoreInst>(I)) {
1656 Alignment = SI->getAlignment();
1657 Ty = SI->getValueOperand()->getType();
1665 return MinAlign(Alignment, Offset);
1681 if (isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) {
1684 "We can't have the same bitwidth for different int types");
1699 return cast<PointerType>(NewTy)->getPointerAddressSpace() ==
1700 cast<PointerType>(OldTy)->getPointerAddressSpace();
1733 assert(!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) &&
1734 "Integer types must be the exact same to convert.");
1741 return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.
getIntPtrType(NewTy)),
1746 return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.
getIntPtrType(NewTy)),
1749 return IRB.CreateIntToPtr(V, NewTy);
1757 return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.
getIntPtrType(OldTy)),
1762 return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.
getIntPtrType(OldTy)),
1765 return IRB.CreatePtrToInt(V, NewTy);
1768 return IRB.CreateBitCast(V, NewTy);
1777 uint64_t ElementSize,
1780 uint64_t BeginOffset =
1782 uint64_t BeginIndex = BeginOffset / ElementSize;
1783 if (BeginIndex * ElementSize != BeginOffset ||
1786 uint64_t EndOffset =
1788 uint64_t EndIndex = EndOffset / ElementSize;
1789 if (EndIndex * ElementSize != EndOffset || EndIndex > Ty->
getNumElements())
1792 assert(EndIndex > BeginIndex &&
"Empty vector!");
1793 uint64_t NumElements = EndIndex - BeginIndex;
1794 Type *SliceTy = (NumElements == 1)
1801 Use *U = S.getUse();
1804 if (
MI->isVolatile())
1806 if (!S.isSplittable())
1808 }
else if (
IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
1809 if (!II->isLifetimeStartOrEnd())
1811 }
else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
1814 }
else if (
LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1815 if (LI->isVolatile())
1817 Type *LTy = LI->getType();
1824 }
else if (
StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
1825 if (SI->isVolatile())
1827 Type *STy = SI->getValueOperand()->getType();
1854 Type *CommonEltTy =
nullptr;
1855 bool HaveCommonEltTy =
true;
1856 auto CheckCandidateType = [&](
Type *Ty) {
1857 if (
auto *VTy = dyn_cast<VectorType>(Ty)) {
1860 CommonEltTy = VTy->getElementType();
1861 else if (CommonEltTy != VTy->getElementType())
1862 HaveCommonEltTy =
false;
1866 for (
const Slice &S : P)
1867 if (S.beginOffset() == P.beginOffset() &&
1868 S.endOffset() == P.endOffset()) {
1869 if (
auto *LI = dyn_cast<LoadInst>(S.getUse()->getUser()))
1870 CheckCandidateType(LI->getType());
1871 else if (
auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser()))
1872 CheckCandidateType(SI->getValueOperand()->getType());
1876 if (CandidateTys.
empty())
1883 if (!HaveCommonEltTy) {
1889 CandidateTys.
end());
1892 if (CandidateTys.
empty())
1900 "Cannot have vector types of different sizes!");
1902 "All non-integer types eliminated!");
1903 assert(LHSTy->getElementType()->isIntegerTy() &&
1904 "All non-integer types eliminated!");
1909 std::unique(CandidateTys.
begin(), CandidateTys.
end(), RankVectorTypes),
1910 CandidateTys.
end());
1916 assert(VTy->getElementType() == CommonEltTy &&
1917 "Unaccounted for element type!");
1918 assert(VTy == CandidateTys[0] &&
1919 "Different vector types with the same element type!");
1922 CandidateTys.resize(1);
1926 auto CheckVectorTypeForPromotion = [&](
VectorType *VTy) {
1931 if (ElementSize % 8)
1934 "vector size not a multiple of element size?");
1937 for (
const Slice &S : P)
1941 for (
const Slice *S : P.splitSliceTails())
1948 if (CheckVectorTypeForPromotion(VTy))
1959 uint64_t AllocBeginOffset,
1962 bool &WholeAllocaOp) {
1965 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
1966 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
1973 Use *U = S.getUse();
1975 if (
LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1976 if (LI->isVolatile())
1983 if (S.beginOffset() < AllocBeginOffset)
1988 if (!isa<VectorType>(LI->getType()) && RelBegin == 0 && RelEnd == Size)
1989 WholeAllocaOp =
true;
1990 if (
IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
1993 }
else if (RelBegin != 0 || RelEnd != Size ||
1999 }
else if (
StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
2000 Type *ValueTy = SI->getValueOperand()->getType();
2001 if (SI->isVolatile())
2008 if (S.beginOffset() < AllocBeginOffset)
2013 if (!isa<VectorType>(ValueTy) && RelBegin == 0 && RelEnd ==
Size)
2014 WholeAllocaOp =
true;
2015 if (
IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
2018 }
else if (RelBegin != 0 || RelEnd != Size ||
2024 }
else if (
MemIntrinsic *
MI = dyn_cast<MemIntrinsic>(U->getUser())) {
2025 if (
MI->isVolatile() || !isa<Constant>(
MI->getLength()))
2027 if (!S.isSplittable())
2029 }
else if (
IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
2030 if (!II->isLifetimeStartOrEnd())
2071 bool WholeAllocaOp =
2074 for (
const Slice &S : P)
2079 for (
const Slice *S : P.splitSliceTails())
2084 return WholeAllocaOp;
2089 const Twine &Name) {
2093 "Element extends past full value");
2094 uint64_t ShAmt = 8 *
Offset;
2098 V = IRB.CreateLShr(V, ShAmt, Name +
".shift");
2102 "Cannot extract to a larger integer!");
2104 V = IRB.CreateTrunc(V, Ty, Name +
".trunc");
2115 "Cannot insert a larger integer!");
2118 V = IRB.CreateZExt(V, IntTy, Name +
".ext");
2122 "Element store outside of alloca store");
2123 uint64_t ShAmt = 8 *
Offset;
2127 V = IRB.CreateShl(V, ShAmt, Name +
".shift");
2131 if (ShAmt || Ty->getBitWidth() < IntTy->
getBitWidth()) {
2133 Old = IRB.CreateAnd(Old, Mask, Name +
".mask");
2135 V = IRB.CreateOr(Old, V, Name +
".insert");
2142 unsigned EndIndex,
const Twine &Name) {
2144 unsigned NumElements = EndIndex - BeginIndex;
2145 assert(NumElements <= VecTy->getNumElements() &&
"Too many elements!");
2150 if (NumElements == 1) {
2151 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2159 for (
unsigned i = BeginIndex; i != EndIndex; ++i)
2168 unsigned BeginIndex,
const Twine &Name) {
2170 assert(VecTy &&
"Can only insert a vector into a vector");
2175 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2182 "Too many elements!");
2196 if (i >= BeginIndex && i < EndIndex)
2197 Mask.
push_back(IRB.getInt32(i - BeginIndex));
2206 Mask.
push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
2231 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2251 uint64_t ElementSize;
2255 uint64_t BeginOffset = 0;
2256 uint64_t EndOffset = 0;
2260 uint64_t NewBeginOffset, NewEndOffset;
2263 bool IsSplittable =
false;
2264 bool IsSplit =
false;
2265 Use *OldUse =
nullptr;
2279 uint64_t NewAllocaBeginOffset,
2280 uint64_t NewAllocaEndOffset,
bool IsIntegerPromotable,
2284 : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
2285 NewAllocaBeginOffset(NewAllocaBeginOffset),
2286 NewAllocaEndOffset(NewAllocaEndOffset),
2287 NewAllocaTy(NewAI.getAllocatedType()),
2288 IntTy(IsIntegerPromotable
2291 DL.getTypeSizeInBits(NewAI.getAllocatedType()))
2293 VecTy(PromotableVecTy),
2294 ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2295 ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy) / 8 : 0),
2296 PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2300 "Only multiple-of-8 sized vector elements are viable");
2303 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2307 bool CanSROA =
true;
2308 BeginOffset = I->beginOffset();
2309 EndOffset = I->endOffset();
2310 IsSplittable = I->isSplittable();
2312 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2313 LLVM_DEBUG(
dbgs() <<
" rewriting " << (IsSplit ?
"split " :
""));
2318 assert(BeginOffset < NewAllocaEndOffset);
2319 assert(EndOffset > NewAllocaBeginOffset);
2320 NewBeginOffset =
std::max(BeginOffset, NewAllocaBeginOffset);
2321 NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2323 SliceSize = NewEndOffset - NewBeginOffset;
2325 OldUse = I->getUse();
2326 OldPtr = cast<Instruction>(OldUse->get());
2328 Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
2329 IRB.SetInsertPoint(OldUserI);
2330 IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());
2333 CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
2352 assert(IsSplit || BeginOffset == NewBeginOffset);
2353 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
2358 size_t LastSROAPrefix = OldName.
rfind(
".sroa.");
2360 OldName = OldName.
substr(LastSROAPrefix + strlen(
".sroa."));
2365 OldName = OldName.
substr(IndexEnd + 1);
2369 OldName = OldName.
substr(OffsetEnd + 1);
2373 OldName = OldName.
substr(0, OldName.
find(
".sroa_"));
2380 Twine(OldName) +
"." 2392 unsigned getSliceAlign(
Type *Ty =
nullptr) {
2397 MinAlign(NewAIAlign, NewBeginOffset - NewAllocaBeginOffset);
2401 unsigned getIndex(uint64_t
Offset) {
2402 assert(VecTy &&
"Can only call getIndex when rewriting a vector");
2403 uint64_t RelOffset = Offset - NewAllocaBeginOffset;
2404 assert(RelOffset / ElementSize < UINT32_MAX &&
"Index out of bounds");
2406 assert(Index * ElementSize == RelOffset);
2410 void deleteIfTriviallyDead(
Value *V) {
2413 Pass.DeadInsts.insert(I);
2416 Value *rewriteVectorizedLoadInst() {
2417 unsigned BeginIndex = getIndex(NewBeginOffset);
2418 unsigned EndIndex = getIndex(NewEndOffset);
2419 assert(EndIndex > BeginIndex &&
"Empty vector!");
2426 assert(IntTy &&
"We cannot insert an integer to the alloca");
2430 assert(NewBeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
2431 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
2432 if (Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
2442 "Can only handle an extract for an overly wide load");
2444 V = IRB.CreateZExt(V, LI.
getType());
2461 bool IsPtrAdjusted =
false;
2464 V = rewriteVectorizedLoadInst();
2466 V = rewriteIntegerLoad(LI);
2467 }
else if (NewBeginOffset == NewAllocaBeginOffset &&
2468 NewEndOffset == NewAllocaEndOffset &&
2498 if (
auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
2499 if (
auto *TITy = dyn_cast<IntegerType>(TargetTy))
2500 if (AITy->getBitWidth() < TITy->getBitWidth()) {
2501 V = IRB.CreateZExt(V, TITy,
"load.ext");
2503 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
2508 LoadInst *NewLI = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy),
2509 getSliceAlign(TargetTy),
2517 IsPtrAdjusted =
true;
2524 "Only integer type loads and stores are split");
2526 "Split load isn't smaller than original load");
2529 "Non-byte-multiple bit width");
2536 Value *Placeholder =
2538 V =
insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset,
2547 Pass.DeadInsts.insert(&LI);
2548 deleteIfTriviallyDead(OldOp);
2556 unsigned BeginIndex = getIndex(NewBeginOffset);
2557 unsigned EndIndex = getIndex(NewEndOffset);
2558 assert(EndIndex > BeginIndex &&
"Empty vector!");
2559 unsigned NumElements = EndIndex - BeginIndex;
2560 assert(NumElements <= VecTy->getNumElements() &&
"Too many elements!");
2561 Type *SliceTy = (NumElements == 1)
2574 Pass.DeadInsts.insert(&SI);
2581 assert(IntTy &&
"We cannot extract an integer from the alloca");
2585 IRB.CreateAlignedLoad(&NewAI, NewAI.
getAlignment(),
"oldload");
2587 assert(BeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
2588 uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
2597 Pass.DeadInsts.insert(&SI);
2616 Pass.PostPromotionWorklist.insert(AI);
2621 "Only integer type loads and stores are split");
2624 "Non-byte-multiple bit width");
2626 V =
extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset - BeginOffset,
2631 return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
2633 return rewriteIntegerStore(V, SI, AATags);
2637 if (NewBeginOffset == NewAllocaBeginOffset &&
2638 NewEndOffset == NewAllocaEndOffset &&
2645 if (
auto *VITy = dyn_cast<IntegerType>(V->
getType()))
2646 if (
auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
2647 if (VITy->getBitWidth() > AITy->getBitWidth()) {
2649 V = IRB.CreateLShr(V, VITy->getBitWidth() - AITy->getBitWidth(),
2651 V = IRB.CreateTrunc(V, AITy,
"load.trunc");
2655 NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.
getAlignment(),
2660 NewSI = IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(V->
getType()),
2669 Pass.DeadInsts.insert(&SI);
2670 deleteIfTriviallyDead(OldOp);
2686 assert(Size > 0 &&
"Expected a positive number of bytes.");
2694 IRB.CreateZExt(V, SplatIntTy,
"zext"),
2704 Value *getVectorSplat(
Value *V,
unsigned NumElements) {
2705 V = IRB.CreateVectorSplat(NumElements, V,
"vsplat");
2721 assert(NewBeginOffset == BeginOffset);
2725 deleteIfTriviallyDead(OldPtr);
2730 Pass.DeadInsts.insert(&II);
2737 if (!VecTy && !IntTy &&
2738 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
2763 assert(ElementTy == ScalarTy);
2765 unsigned BeginIndex = getIndex(NewBeginOffset);
2766 unsigned EndIndex = getIndex(NewEndOffset);
2767 assert(EndIndex > BeginIndex &&
"Empty vector!");
2768 unsigned NumElements = EndIndex - BeginIndex;
2769 assert(NumElements <= VecTy->getNumElements() &&
"Too many elements!");
2774 if (NumElements > 1)
2775 Splat = getVectorSplat(Splat, NumElements);
2778 IRB.CreateAlignedLoad(&NewAI, NewAI.
getAlignment(),
"oldload");
2785 uint64_t Size = NewEndOffset - NewBeginOffset;
2788 if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
2789 EndOffset != NewAllocaBeginOffset)) {
2791 IRB.CreateAlignedLoad(&NewAI, NewAI.
getAlignment(),
"oldload");
2793 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
2797 "Wrong type for an alloca wide integer!");
2802 assert(NewBeginOffset == NewAllocaBeginOffset);
2803 assert(NewEndOffset == NewAllocaEndOffset);
2806 if (
VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
2807 V = getVectorSplat(V, AllocaVecTy->getNumElements());
2833 unsigned SliceAlign = getSliceAlign();
2842 if (!IsSplittable) {
2843 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
2854 deleteIfTriviallyDead(OldPtr);
2867 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
2874 if (EmitMemCpy && &OldAI == &NewAI) {
2876 assert(NewBeginOffset == BeginOffset);
2879 if (NewEndOffset != EndOffset)
2881 NewEndOffset - NewBeginOffset));
2885 Pass.DeadInsts.insert(&II);
2892 assert(AI != &OldAI && AI != &NewAI &&
2893 "Splittable transfers cannot reach the same alloca on both ends.");
2894 Pass.Worklist.insert(AI);
2902 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
2903 unsigned OtherAlign =
2905 OtherAlign =
MinAlign(OtherAlign ? OtherAlign : 1,
2911 OtherPtr =
getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
2914 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
2918 Value *DestPtr, *SrcPtr;
2919 unsigned DestAlign, SrcAlign;
2923 DestAlign = SliceAlign;
2925 SrcAlign = OtherAlign;
2928 DestAlign = OtherAlign;
2930 SrcAlign = SliceAlign;
2932 CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
2940 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
2941 NewEndOffset == NewAllocaEndOffset;
2942 uint64_t Size = NewEndOffset - NewBeginOffset;
2943 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
2944 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
2945 unsigned NumElements = EndIndex - BeginIndex;
2951 if (VecTy && !IsWholeAlloca) {
2952 if (NumElements == 1)
2958 }
else if (IntTy && !IsWholeAlloca) {
2966 unsigned SrcAlign = OtherAlign;
2967 Value *DstPtr = &NewAI;
2968 unsigned DstAlign = SliceAlign;
2975 if (VecTy && !IsWholeAlloca && !IsDest) {
2976 Src = IRB.CreateAlignedLoad(&NewAI, NewAI.
getAlignment(),
"load");
2978 }
else if (IntTy && !IsWholeAlloca && !IsDest) {
2979 Src = IRB.CreateAlignedLoad(&NewAI, NewAI.
getAlignment(),
"load");
2981 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
2991 if (VecTy && !IsWholeAlloca && IsDest) {
2993 IRB.CreateAlignedLoad(&NewAI, NewAI.
getAlignment(),
"oldload");
2995 }
else if (IntTy && !IsWholeAlloca && IsDest) {
2997 IRB.CreateAlignedLoad(&NewAI, NewAI.
getAlignment(),
"oldload");
2999 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3005 IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.
isVolatile()));
3018 Pass.DeadInsts.insert(&II);
3027 if (NewBeginOffset != NewAllocaBeginOffset ||
3028 NewEndOffset != NewAllocaEndOffset)
3033 NewEndOffset - NewBeginOffset);
3037 Value *Ptr = getNewAllocaSlicePtr(IRB, PointerTy);
3040 New = IRB.CreateLifetimeStart(Ptr, Size);
3042 New = IRB.CreateLifetimeEnd(Ptr, Size);
3061 if (
LoadInst *LI = dyn_cast<LoadInst>(I)) {
3065 LI->
setAlignment(std::min(LoadAlign, getSliceAlign()));
3068 if (
StoreInst *SI = dyn_cast<StoreInst>(I)) {
3074 SI->
setAlignment(std::min(StoreAlign, getSliceAlign()));
3078 assert(isa<BitCastInst>(I) || isa<PHINode>(I) ||
3079 isa<SelectInst>(I) || isa<GetElementPtrInst>(I));
3081 if (Visited.
insert(cast<Instruction>(U)).second)
3083 }
while (!Uses.
empty());
3086 bool visitPHINode(
PHINode &PN) {
3088 assert(BeginOffset >= NewAllocaBeginOffset &&
"PHIs are unsplittable");
3089 assert(EndOffset <= NewAllocaEndOffset &&
"PHIs are unsplittable");
3095 IRBuilderTy PtrBuilder(IRB);
3096 if (isa<PHINode>(OldPtr))
3099 PtrBuilder.SetInsertPoint(OldPtr);
3100 PtrBuilder.SetCurrentDebugLocation(OldPtr->
getDebugLoc());
3102 Value *NewPtr = getNewAllocaSlicePtr(PtrBuilder, OldPtr->
getType());
3107 deleteIfTriviallyDead(OldPtr);
3110 fixLoadStoreAlign(PN);
3122 "Pointer isn't an operand!");
3123 assert(BeginOffset >= NewAllocaBeginOffset &&
"Selects are unsplittable");
3124 assert(EndOffset <= NewAllocaEndOffset &&
"Selects are unsplittable");
3126 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3134 deleteIfTriviallyDead(OldPtr);
3137 fixLoadStoreAlign(SI);
3154 class AggLoadStoreRewriter :
public InstVisitor<AggLoadStoreRewriter, bool> {
3156 friend class InstVisitor<AggLoadStoreRewriter, bool>;
3172 AggLoadStoreRewriter(
const DataLayout &DL) : DL(DL) {}
3179 bool Changed =
false;
3180 while (!Queue.empty()) {
3181 U = Queue.pop_back_val();
3182 Changed |= visit(cast<Instruction>(U->getUser()));
3193 Queue.push_back(&U);
3197 bool visitInstruction(
Instruction &I) {
return false; }
3200 template <
typename Derived>
class OpSplitter {
3231 : IRB(InsertionPoint), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr),
3232 BaseTy(BaseTy), BaseAlign(BaseAlign), DL(DL) {}
3251 return static_cast<Derived *
>(
this)->emitFunc(
3255 if (
ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
3256 unsigned OldSize = Indices.
size();
3258 for (
unsigned Idx = 0,
Size = ATy->getNumElements(); Idx !=
Size;
3260 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
3262 GEPIndices.
push_back(IRB.getInt32(Idx));
3263 emitSplitOps(ATy->getElementType(), Agg, Name +
"." +
Twine(Idx));
3270 if (
StructType *STy = dyn_cast<StructType>(Ty)) {
3271 unsigned OldSize = Indices.
size();
3273 for (
unsigned Idx = 0,
Size = STy->getNumElements(); Idx !=
Size;
3275 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
3277 GEPIndices.
push_back(IRB.getInt32(Idx));
3278 emitSplitOps(STy->getElementType(Idx), Agg, Name +
"." +
Twine(Idx));
3289 struct LoadOpSplitter :
public OpSplitter<LoadOpSplitter> {
3294 : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
3295 DL), AATags(AATags) {}
3303 IRB.CreateInBoundsGEP(
nullptr, Ptr, GEPIndices, Name +
".gep");
3304 LoadInst *
Load = IRB.CreateAlignedLoad(GEP, Align, Name +
".load");
3307 Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name +
".insert");
3321 LoadOpSplitter Splitter(&LI, *U, LI.
getType(), AATags,
3330 struct StoreOpSplitter :
public OpSplitter<StoreOpSplitter> {
3333 : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
3345 Value *ExtractValue =
3346 IRB.CreateExtractValue(Agg, Indices, Name +
".extract");
3347 Value *InBoundsGEP =
3348 IRB.CreateInBoundsGEP(
nullptr, Ptr, GEPIndices, Name +
".gep");
3350 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Align);
3368 StoreOpSplitter Splitter(&SI, *U, V->
getType(), AATags,
3385 bool visitPHINode(
PHINode &PN) {
3411 if (
ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
3412 InnerTy = ArrTy->getElementType();
3413 }
else if (
StructType *STy = dyn_cast<StructType>(Ty)) {
3416 InnerTy = STy->getElementType(Index);
3450 Type *ElementTy = SeqTy->getElementType();
3452 uint64_t NumSkippedElements = Offset / ElementSize;
3453 if (NumSkippedElements >= SeqTy->getNumElements())
3455 Offset -= NumSkippedElements * ElementSize;
3458 if (Offset > 0 || Size < ElementSize) {
3460 if ((Offset + Size) > ElementSize)
3467 if (Size == ElementSize)
3469 assert(Size > ElementSize);
3470 uint64_t NumElements = Size / ElementSize;
3471 if (NumElements * ElementSize != Size)
3483 uint64_t EndOffset = Offset +
Size;
3492 if (Offset >= ElementSize)
3496 if (Offset > 0 || Size < ElementSize) {
3497 if ((Offset + Size) > ElementSize)
3503 if (Size == ElementSize)
3508 if (EndOffset < SL->getSizeInBytes()) {
3510 if (Index == EndIndex)
3520 assert(Index < EndIndex);
3528 if (Size != SubSL->getSizeInBytes())
3578 struct SplitOffsets {
3580 std::vector<uint64_t> Splits;
3597 LLVM_DEBUG(
dbgs() <<
" Searching for candidate loads and stores\n");
3599 for (Slice &S : P) {
3600 Instruction *I = cast<Instruction>(S.getUse()->getUser());
3601 if (!S.isSplittable() || S.endOffset() <= P.endOffset()) {
3605 if (
auto *LI = dyn_cast<LoadInst>(I))
3606 UnsplittableLoads.
insert(LI);
3607 else if (
auto *SI = dyn_cast<StoreInst>(I))
3608 if (
auto *LI = dyn_cast<LoadInst>(SI->getValueOperand()))
3609 UnsplittableLoads.
insert(LI);
3612 assert(P.endOffset() > S.beginOffset() &&
3613 "Empty or backwards partition!");
3616 if (
auto *LI = dyn_cast<LoadInst>(I)) {
3617 assert(!LI->isVolatile() &&
"Cannot split volatile loads!");
3622 auto IsLoadSimplyStored = [](
LoadInst *LI) {
3625 if (!SI || !SI->isSimple())
3630 if (!IsLoadSimplyStored(LI)) {
3631 UnsplittableLoads.
insert(LI);
3636 }
else if (
auto *SI = dyn_cast<StoreInst>(I)) {
3637 if (S.getUse() != &SI->getOperandUse(SI->getPointerOperandIndex()))
3641 if (!StoredLoad || !StoredLoad->isSimple())
3643 assert(!SI->isVolatile() &&
"Cannot split volatile stores!");
3653 auto &
Offsets = SplitOffsetsMap[
I];
3655 "Should not have splits the first time we see an instruction!");
3657 Offsets.Splits.push_back(P.endOffset() - S.beginOffset());
3662 for (Slice *S : P.splitSliceTails()) {
3663 auto SplitOffsetsMapI =
3664 SplitOffsetsMap.
find(cast<Instruction>(S->getUse()->getUser()));
3665 if (SplitOffsetsMapI == SplitOffsetsMap.
end())
3667 auto &
Offsets = SplitOffsetsMapI->second;
3671 "Cannot have an empty set of splits on the second partition!");
3673 P.beginOffset() -
Offsets.S->beginOffset() &&
3674 "Previous split does not end where this one begins!");
3678 if (S->endOffset() > P.endOffset())
3679 Offsets.Splits.push_back(P.endOffset() -
Offsets.S->beginOffset());
3689 [&UnsplittableLoads, &SplitOffsetsMap](
StoreInst *SI) {
3695 if (UnsplittableLoads.
count(LI))
3698 auto LoadOffsetsI = SplitOffsetsMap.
find(LI);
3699 if (LoadOffsetsI == SplitOffsetsMap.
end())
3701 auto &LoadOffsets = LoadOffsetsI->second;
3704 auto &StoreOffsets = SplitOffsetsMap[
SI];
3709 if (LoadOffsets.Splits == StoreOffsets.Splits)
3714 <<
" Mismatched splits for load and store:\n" 3715 <<
" " << *LI <<
"\n" 3716 <<
" " << *SI <<
"\n");
3722 UnsplittableLoads.
insert(LI);
3734 return UnsplittableLoads.
count(LI);
3740 [&UnsplittableLoads](
LoadInst *LI) {
3741 return UnsplittableLoads.
count(LI);
3752 IRBuilderTy IRB(&AI);
3770 std::vector<LoadInst *> SplitLoads;
3777 assert(LoadSize > 0 &&
"Cannot have a zero-sized integer load!");
3779 auto &
Offsets = SplitOffsetsMap[LI];
3781 "Slice size should always match load size exactly!");
3782 uint64_t BaseOffset =
Offsets.S->beginOffset();
3783 assert(BaseOffset + LoadSize > BaseOffset &&
3784 "Cannot represent alloca access size using 64-bit integers!");
3787 IRB.SetInsertPoint(LI);
3791 uint64_t PartOffset = 0, PartSize =
Offsets.Splits.front();
3796 auto *PartPtrTy = PartTy->getPointerTo(AS);
3797 LoadInst *PLoad = IRB.CreateAlignedLoad(
3800 PartPtrTy, BasePtr->
getName() +
"."),
3808 SplitLoads.push_back(PLoad);
3812 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
3816 <<
", " << NewSlices.
back().endOffset()
3817 <<
"): " << *PLoad <<
"\n");
3824 PartOffset =
Offsets.Splits[Idx];
3826 PartSize = (Idx <
Size ?
Offsets.Splits[Idx] : LoadSize) - PartOffset;
3832 bool DeferredStores =
false;
3835 if (!Stores.
empty() && SplitOffsetsMap.
count(SI)) {
3836 DeferredStores =
true;
3843 IRB.SetInsertPoint(SI);
3845 LLVM_DEBUG(
dbgs() <<
" Splitting store of load: " << *SI <<
"\n");
3847 for (
int Idx = 0,
Size = SplitLoads.size(); Idx <
Size; ++Idx) {
3849 uint64_t PartOffset = Idx == 0 ? 0 :
Offsets.Splits[Idx - 1];
3854 StoreInst *PStore = IRB.CreateAlignedStore(
3858 PartPtrTy, StoreBasePtr->
getName() +
"."),
3862 LLVM_DEBUG(
dbgs() <<
" +" << PartOffset <<
":" << *PStore <<
"\n");
3869 if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(StoreBasePtr)) {
3870 ResplitPromotableAllocas.
insert(OtherAI);
3871 Worklist.insert(OtherAI);
3872 }
else if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(
3874 Worklist.insert(OtherAI);
3878 DeadInsts.insert(SI);
3883 SplitLoadsMap.
insert(std::make_pair(LI, std::move(SplitLoads)));
3886 DeadInsts.insert(LI);
3899 assert(StoreSize > 0 &&
"Cannot have a zero-sized integer store!");
3903 "Slice size should always match load size exactly!");
3904 uint64_t BaseOffset =
Offsets.S->beginOffset();
3905 assert(BaseOffset + StoreSize > BaseOffset &&
3906 "Cannot represent alloca access size using 64-bit integers!");
3914 auto SplitLoadsMapI = SplitLoadsMap.
find(LI);
3915 std::vector<LoadInst *> *SplitLoads =
nullptr;
3916 if (SplitLoadsMapI != SplitLoadsMap.
end()) {
3917 SplitLoads = &SplitLoadsMapI->second;
3919 "Too few split loads for the number of splits in the store!");
3924 uint64_t PartOffset = 0, PartSize =
Offsets.Splits.front();
3934 PLoad = (*SplitLoads)[Idx];
3936 IRB.SetInsertPoint(LI);
3938 PLoad = IRB.CreateAlignedLoad(
3941 LoadPartPtrTy, LoadBasePtr->
getName() +
"."),
3947 IRB.SetInsertPoint(SI);
3949 StoreInst *PStore = IRB.CreateAlignedStore(
3953 StorePartPtrTy, StoreBasePtr->
getName() +
"."),
3958 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
3962 <<
", " << NewSlices.
back().endOffset()
3963 <<
"): " << *PStore <<
"\n");
3973 PartOffset =
Offsets.Splits[Idx];
3975 PartSize = (Idx <
Size ?
Offsets.Splits[Idx] : StoreSize) - PartOffset;
3984 if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(LoadBasePtr)) {
3985 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
3986 ResplitPromotableAllocas.
insert(OtherAI);
3987 Worklist.insert(OtherAI);
3988 }
else if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(
3990 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
3991 Worklist.insert(OtherAI);
4006 DeadInsts.insert(LI);
4008 DeadInsts.insert(SI);
4022 for (
auto I = AS.
begin(),
E = AS.
end(); I !=
E; ++
I)
4028 PromotableAllocas.erase(
4032 PromotableAllocas.end());
4052 Type *SliceTy =
nullptr;
4056 SliceTy = CommonUseTy;
4060 SliceTy = TypePartitionTy;
4061 if ((!SliceTy || (SliceTy->
isArrayTy() &&
4111 <<
") to: " << *NewAI <<
"\n");
4116 unsigned PPWOldSize = PostPromotionWorklist.size();
4117 unsigned NumUses = 0;
4122 P.
endOffset(), IsIntegerPromotable, VecTy,
4123 PHIUsers, SelectUsers);
4124 bool Promotable =
true;
4129 for (Slice &S : P) {
4134 NumAllocaPartitionUses += NumUses;
4135 MaxUsesPerAllocaPartition.updateMax(NumUses);
4143 SelectUsers.
clear();
4151 SelectUsers.clear();
4156 if (PHIUsers.empty() && SelectUsers.empty()) {
4158 PromotableAllocas.push_back(NewAI);
4163 for (
PHINode *PHIUser : PHIUsers)
4164 SpeculatablePHIs.insert(PHIUser);
4166 SpeculatableSelects.insert(SelectUser);
4167 Worklist.insert(NewAI);
4171 while (PostPromotionWorklist.size() > PPWOldSize)
4172 PostPromotionWorklist.pop_back();
4182 Worklist.insert(NewAI);
4194 unsigned NumPartitions = 0;
4195 bool Changed =
false;
4199 Changed |= presplitLoadsAndStores(AI, AS);
4207 bool IsSorted =
true;
4210 const uint64_t MaxBitVectorSize = 1024;
4211 if (AllocaSize <= MaxBitVectorSize) {
4216 for (
unsigned O = S.beginOffset() + 1;
4217 O < S.endOffset() &&
O < AllocaSize;
O++)
4218 SplittableOffset.
reset(
O);
4220 for (Slice &S : AS) {
4221 if (!S.isSplittable())
4224 if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&
4225 (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))
4228 if (isa<LoadInst>(S.getUse()->getUser()) ||
4229 isa<StoreInst>(S.getUse()->getUser())) {
4230 S.makeUnsplittable();
4238 for (Slice &S : AS) {
4239 if (!S.isSplittable())
4242 if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)
4245 if (isa<LoadInst>(S.getUse()->getUser()) ||
4246 isa<StoreInst>(S.getUse()->getUser())) {
4247 S.makeUnsplittable();
4269 if (
AllocaInst *NewAI = rewritePartition(AI, AS, P)) {
4272 uint64_t SizeOfByte = 8;
4275 uint64_t
Size = std::min(AllocaSize, P.
size() * SizeOfByte);
4282 NumAllocaPartitions += NumPartitions;
4283 MaxPartitionsPerAlloca.updateMax(NumPartitions);
4288 if (!DbgDeclares.
empty()) {
4291 auto VarSize = Var->getSizeInBits();
4294 for (
auto Fragment : Fragments) {
4297 auto *FragmentExpr = Expr;
4298 if (Fragment.Size < AllocaSize || Expr->isFragment()) {
4301 auto ExprFragment = Expr->getFragmentInfo();
4302 uint64_t
Offset = ExprFragment ? ExprFragment->OffsetInBits : 0;
4303 uint64_t Start = Offset + Fragment.Offset;
4304 uint64_t
Size = Fragment.Size;
4307 ExprFragment->OffsetInBits + ExprFragment->SizeInBits;
4308 if (Start >= AbsEnd)
4311 Size = std::min(Size, AbsEnd - Start);
4314 if (
auto OrigFragment = FragmentExpr->getFragmentInfo()) {
4315 assert(Start >= OrigFragment->OffsetInBits &&
4316 "new fragment is outside of original fragment");
4317 Start -= OrigFragment->OffsetInBits;
4322 if (Size > *VarSize)
4324 if (Size == 0 || Start + Size > *VarSize)
4329 if (!VarSize || *VarSize != Size) {
4340 OldDII->eraseFromParent();
4342 DIB.insertDeclare(Fragment.Alloca, Var, FragmentExpr,
4350 void SROA::clobberUse(
Use &U) {
4358 if (
Instruction *OldI = dyn_cast<Instruction>(OldV))
4360 DeadInsts.insert(OldI);
4371 ++NumAllocasAnalyzed;
4385 bool Changed =
false;
4389 AggLoadStoreRewriter AggRewriter(DL);
4390 Changed |= AggRewriter.rewrite(AI);
4401 for (
Use &DeadOp : DeadUser->operands())
4408 DeadInsts.insert(DeadUser);
4412 clobberUse(*DeadOp);
4420 Changed |= splitAlloca(AI, AS);
4423 while (!SpeculatablePHIs.empty())
4427 while (!SpeculatableSelects.empty())
4442 bool SROA::deleteDeadInstructions(
4444 bool Changed =
false;
4445 while (!DeadInsts.empty()) {
4447 LLVM_DEBUG(
dbgs() <<
"Deleting dead instruction: " << *I <<
"\n");
4452 if (
AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
4453 DeletedAllocas.
insert(AI);
4455 OldDII->eraseFromParent();
4461 if (
Instruction *U = dyn_cast<Instruction>(Operand)) {
4465 DeadInsts.insert(U);
4480 bool SROA::promoteAllocas(
Function &
F) {
4481 if (PromotableAllocas.empty())
4484 NumPromoted += PromotableAllocas.size();
4488 PromotableAllocas.clear();
4502 if (
AllocaInst *AI = dyn_cast<AllocaInst>(I))
4503 Worklist.insert(AI);
4506 bool Changed =
false;
4512 while (!Worklist.empty()) {
4513 Changed |= runOnAlloca(*Worklist.pop_back_val());
4514 Changed |= deleteDeadInstructions(DeletedAllocas);
4518 if (!DeletedAllocas.
empty()) {
4519 auto IsInSet = [&](
AllocaInst *AI) {
return DeletedAllocas.
count(AI); };
4520 Worklist.remove_if(IsInSet);
4521 PostPromotionWorklist.remove_if(IsInSet);
4523 PromotableAllocas.end());
4524 DeletedAllocas.
clear();
4528 Changed |= promoteAllocas(F);
4530 Worklist = PostPromotionWorklist;
4531 PostPromotionWorklist.clear();
4532 }
while (!Worklist.empty());
4564 if (skipFunction(F))
4567 auto PA = Impl.runImpl(
4568 F, getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
4569 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F));
4588 "Scalar Replacement Of Aggregates",
false,
false)
Legacy wrapper pass to provide the GlobalsAAResult object.
static Value * getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *PointerTy, Twine NamePrefix)
Compute an adjusted pointer from Ptr by Offset bytes where the resulting pointer has PointerTy...
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
static VectorType * isVectorPromotionViable(Partition &P, const DataLayout &DL)
Test whether the given alloca partitioning and range of slices can be promoted to a vector...
static Value * getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, Type *Ty, APInt &Offset, Type *TargetTy, SmallVectorImpl< Value *> &Indices, Twine NamePrefix)
Recursively compute indices for a natural GEP.
Value * getValueOperand()
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
A parsed version of the target data layout string in and methods for querying it. ...
const_iterator end(StringRef path)
Get end iterator over path.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
uint64_t getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
An iterator over partitions of the alloca's slices.
void setInt(IntType IntVal)
static bool runImpl(Function &F, TargetLibraryInfo &TLI, DominatorTree &DT)
This is the entry point for all transforms.
iterator_range< use_iterator > uses()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
uint64_t getZExtValue() const
Get zero extended value.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
Base class for instruction visitors.
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
const Value * stripInBoundsOffsets() const
Strip off pointer casts and inbounds GEPs.
uint64_t beginOffset() const
The start offset of this partition.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
This class represents lattice values for constants.
PointerTy getPointer() const
Type * getElementType(unsigned N) const
This is the interface for a simple mod/ref and alias analysis over globals.
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
LLVM_NODISCARD size_t rfind(char C, size_t From=npos) const
Search for the last character C in the string.
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
void push_back(const T &Elt)
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
void erase(iterator Start, iterator Stop)
Erase a range of slices.
bool isTriviallyEmpty() const
Check if this twine is trivially empty; a false return value does not necessarily mean the twine is e...
This class provides information about the result of a visit.
This class represents a function call, abstracting a target machine's calling convention.
static Value * extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name)
Representation of the alloca slices.
An immutable pass that tracks lazily created AssumptionCache objects.
unsigned getSourceAlignment() const
Instruction * getAbortingInst() const
Get the instruction causing the visit to abort.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this store instruction.
gep_type_iterator gep_type_end(const User *GEP)
const Value * getTrueValue() const
void insert(ArrayRef< Slice > NewSlices)
Insert new slices for this alloca.
A cache of @llvm.assume calls within a function.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
LLVMContext & getContext() const
All values hold a context through their type.
void deleteValue()
Delete a pointer to a generic Value.
void setSource(Value *Ptr)
void setDest(Value *Ptr)
Set the specified arguments of the instruction.
This class wraps the llvm.memset intrinsic.
const Use & getOperandUse(unsigned i) const
Scalar Replacement Of Aggregates
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
STATISTIC(NumFunctions, "Total number of functions")
Analysis pass which computes a DominatorTree.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
An instruction for reading from memory.
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
bool isVectorTy() const
True if this is an instance of VectorType.
This defines the Use class.
void reserve(size_type N)
Value * getLength() const
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
bool isEscaped() const
Test whether a pointer to the allocation escapes our analysis.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
bool isSafeToLoadUnconditionally(Value *V, unsigned Align, const DataLayout &DL, Instruction *ScanFrom=nullptr, const DominatorTree *DT=nullptr)
Return true if we know that executing a load from this value cannot trap.
unsigned getElementContainingOffset(uint64_t Offset) const
Given a valid byte offset into the structure, returns the structure index that contains it...
unsigned getBitWidth() const
Return the number of bits in the APInt.
static unsigned getAdjustedAlignment(Instruction *I, uint64_t Offset, const DataLayout &DL)
Compute the adjusted alignment for a load or store from an offset.
Builder for the alloca slices.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
iterator begin()
Instruction iterator methods.
Value * getArgOperand(unsigned i) const
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, VectorType *Ty, uint64_t ElementSize, const DataLayout &DL)
Test whether the given slice use can be promoted to a vector.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
AnalysisUsage & addRequired()
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
#define INITIALIZE_PASS_DEPENDENCY(depName)
void setPointer(PointerTy PtrVal)
bool isVolatile() const
Return true if this is a load from a volatile memory location.
amdgpu Simplify well known AMD library false Value Value const Twine & Name
This class represents the LLVM 'select' instruction.
Type * getPointerElementType() const
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
unsigned getAlignment() const
Return the alignment of the memory that is being allocated by the instruction.
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
PointerType * getType() const
Overload to return most specific pointer type.
Class to represent struct types.
A Use represents the edge between a Value definition and its users.
static Type * getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, uint64_t Size)
Try to find a partition of the aggregate type passed in for a given offset and size.
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
ArrayRef< Slice * > splitSliceTails() const
Get the sequence of split slice tails.
bool isAborted() const
Did we abort the visit early?
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
bool isIntegerTy() const
True if this is an instance of IntegerType.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
static Value * convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *NewTy)
Generic routine to convert an SSA value to a value of a different type.
A partition of the slices.
unsigned getDestAlignment() const
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
uint64_t getNumElements() const
static StructType * get(LLVMContext &Context, ArrayRef< Type *> Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
This file implements a class to represent arbitrary precision integral constant values and operations...
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass...
This file provides a collection of visitors which walk the (instruction) uses of a pointer...
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
User * getUser() const LLVM_READONLY
Returns the User that contains this Use.
bool visit(AllocaSlices::const_iterator I)
A base class for visitors over the uses of a pointer value.
Type * getType() const
All values are typed, get the type of this value.
bool insert(const value_type &X)
Insert a new element into the SetVector.
void copyNonnullMetadata(const LoadInst &OldLI, MDNode *N, LoadInst &NewLI)
Copy a nonnull metadata node to a new load instruction.
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
SmallVectorImpl< Slice >::iterator iterator
Support for iterating over the slices.
const_iterator end() const
A legacy pass for the legacy pass manager that wraps the SROA pass.
Class to represent array types.
This is the common base class for debug info intrinsics for variables.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
This class represents a no-op cast from one type to another.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
const APInt & getValue() const
Return the constant as an APInt value reference.
ConstantFolder - Create constants with minimum, target independent, folding.
An instruction for storing to memory.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
void printUse(raw_ostream &OS, const_iterator I, StringRef Indent=" ") const
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
static Constant * getUDiv(Constant *C1, Constant *C2, bool isExact=false)
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Type::subtype_iterator element_iterator
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy)
Test whether we can convert a value from the old to the new type.
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Value * getOperand(unsigned i) const
Class to represent pointers.
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
iterator find(const_arg_type_t< KeyT > Val)
const BasicBlock & getEntryBlock() const
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
void getAAMetadata(AAMDNodes &N, bool Merge=false) const
Fills the AAMDNodes structure with AA metadata from this instruction.
Maximum number of bits that can be specified.
Scalar Replacement Of false
iterator_range< partition_iterator > partitions()
initializer< Ty > init(const Ty &Val)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
A set of analyses that are preserved following a run of a transformation pass.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
bool areAllPreserved() const
Test whether all analyses are preserved (and none are abandoned).
void setAAMetadata(const AAMDNodes &N)
Sets the metadata on this instruction from the AAMDNodes structure.
LLVM Basic Block Representation.
The instances of the Type class are immutable: once they are created, they are never changed...
static unsigned getPointerOperandIndex()
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
LLVM_NODISCARD bool empty() const
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
ArrayRef< Use * > getDeadOperands() const
Access the dead operands referring to this alloca.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
static bool isSafeSelectToSpeculate(SelectInst &SI)
Select instructions that use an alloca and are subsequently loaded can be rewritten to load both inpu...
LLVM_NODISCARD size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
static Value * getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *TargetTy, SmallVectorImpl< Value *> &Indices, Twine NamePrefix)
Get a natural GEP from a base pointer to a particular offset and resulting in a particular type...
static sys::TimePoint< std::chrono::seconds > now(bool Deterministic)
element_iterator element_end() const
DIExpression * getExpression() const
Represent the analysis usage information of a pass.
bool isLifetimeStartOrEnd() const
Return true if the instruction is a llvm.lifetime.start or llvm.lifetime.end marker.
static cl::opt< bool > SROAStrictInbounds("sroa-strict-inbounds", cl::init(false), cl::Hidden)
Hidden option to experiment with completely strict handling of inbounds GEPs.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Analysis pass providing a never-invalidated alias analysis result.
static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New)
void initializeSROALegacyPassPass(PassRegistry &)
FunctionPass class - This class is used to implement most global optimizations.
Value * getPointerOperand()
unsigned getAddressSpace() const
Return the address space of the Pointer type.
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Class to represent integer types.
unsigned getIndexTypeSizeInBits(Type *Ty) const
Layout size of the index used in GEP calculation.
AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROA &Pass, AllocaInst &OldAI, AllocaInst &NewAI, uint64_t NewAllocaBeginOffset, uint64_t NewAllocaEndOffset, bool IsIntegerPromotable, VectorType *PromotableVecTy, SmallSetVector< PHINode *, 8 > &PHIUsers, SmallSetVector< SelectInst *, 8 > &SelectUsers)
void setAlignment(unsigned Align)
auto remove_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly...
const Value * getCondition() const
static Constant * getAllOnesValue(Type *Ty)
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
iterator erase(const_iterator CI)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
static Value * getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL, Value *BasePtr, Type *Ty, Type *TargetTy, SmallVectorImpl< Value *> &Indices, Twine NamePrefix)
Get a natural GEP off of the BasePtr walking through Ty toward TargetTy without changing the offset o...
void sort(IteratorTy Start, IteratorTy End)
void printSlice(raw_ostream &OS, const_iterator I, StringRef Indent=" ") const
static Type * stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty)
Strip aggregate type wrapping.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This provides the default implementation of the IRBuilder 'InsertHelper' method that is called whenev...
This is the superclass of the array and vector type classes.
A function analysis which provides an AssumptionCache.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
isPodLike - This is a type trait that is used to determine whether a given type can be copied around ...
A SetVector that performs no allocations if smaller than a certain size.
print lazy value Lazy Value Info Printer Pass
This is the common base class for memset/memcpy/memmove.
Iterator for intrusive lists based on ilist_node.
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
This is the shared class of boolean and integer constants.
TinyPtrVector< DbgVariableIntrinsic * > FindDbgAddrUses(Value *V)
Finds all intrinsics declaring local variables as living in the memory that 'V' points to...
AllocaSlices(const DataLayout &DL, AllocaInst &AI)
Construct the slices of a particular alloca.
Module.h This file contains the declarations for the Module class.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
partition_iterator & operator++()
static Value * insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name)
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU...
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
LLVM_NODISCARD T pop_back_val()
uint64_t getSizeInBytes() const
SmallVectorImpl< Slice >::const_iterator const_iterator
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Value * getRawSource() const
Return the arguments to the instruction.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
void setOperand(unsigned i, Value *Val)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
void clear()
Completely clear the SetVector.
A range adaptor for a pair of iterators.
Class to represent vector types.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Class for arbitrary precision integers.
static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t AllocBeginOffset, Type *AllocaTy, const DataLayout &DL, bool &WholeAllocaOp)
Test whether a slice of an alloca is valid for integer widening.
iterator_range< user_iterator > users()
Represents analyses that only rely on functions' control flow.
const Value * getFalseValue() const
element_iterator element_begin() const
static Value * insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name)
bool isNonIntegralPointerType(PointerType *PT) const
const_iterator begin() const
static Type * findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset)
Walk the range of a partitioning looking for a common type to cover this sequence of slices...
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
static Value * foldPHINodeOrSelectInst(Instruction &I)
A helper that folds a PHI node or a select.
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Virtual Register Rewriter
bool operator!=(uint64_t V1, const APInt &V2)
INITIALIZE_PASS_BEGIN(SROALegacyPass, "sroa", "Scalar Replacement Of Aggregates", false, false) INITIALIZE_PASS_END(SROALegacyPass
bool ugt(const APInt &RHS) const
Unsigned greather than comparison.
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
This class wraps the llvm.memcpy/memmove intrinsics.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
bool isVolatile() const
Return true if this is a store to a volatile memory location.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
uint64_t getElementOffset(unsigned Idx) const
unsigned getAlignment() const
Return the alignment of the access that is being performed.
unsigned getIntegerBitWidth() const
LLVM_NODISCARD bool empty() const
AtomicOrdering getOrdering() const
Returns the ordering constraint of this store instruction.
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value...
Visitor to rewrite instructions using p particular slice of an alloca to use a new alloca...
void preserveSet()
Mark an analysis set as preserved.
StringRef getName() const
Return a constant reference to the value's name.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
static void speculateSelectInstLoads(SelectInst &SI)
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
FunctionPass * createSROAPass()
uint64_t endOffset() const
The end offset of this partition.
Instruction * getEscapingInst() const
Get the instruction causing the pointer to escape.
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
void preserve()
Mark an analysis as preserved.
DILocalVariable * getVariable() const
static Value * extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
std::string str() const
Return the twine contents as a std::string.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
static unsigned getPointerOperandIndex()
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
bool operator<(int64_t V1, const APSInt &V2)
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
LLVM Value Representation.
void setAlignment(unsigned Align)
static Value * buildGEP(IRBuilderTy &IRB, Value *BasePtr, SmallVectorImpl< Value *> &Indices, Twine NamePrefix)
Build a GEP out of a base pointer and indices.
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
static bool isSafePHIToSpeculate(PHINode &PN)
PHI instructions that use an alloca and are subsequently loaded can be rewritten to load both input p...
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB, BasicBlock::iterator InsertPt) const
static void speculatePHINodeLoads(PHINode &PN)
This class implements an extremely fast bulk output stream that can only output to a stream...
void PromoteMemToReg(ArrayRef< AllocaInst *> Allocas, DominatorTree &DT, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
PtrInfo visitPtr(Instruction &I)
Recursively visit the uses of the given pointer.
An optimization pass providing Scalar Replacement of Aggregates.
Type * getElementType() const
static cl::opt< bool > SROARandomShuffleSlices("sroa-random-shuffle-slices", cl::init(false), cl::Hidden)
Hidden option to enable randomly shuffling the slices to help uncover instability in their order...
uint64_t size() const
The size of the partition.
bool hasOneUse() const
Return true if there is exactly one user of this value.
bool empty() const
Test whether this partition contains no slices, and merely spans a region occupied by split slices...
StringRef - Represent a constant reference to a string, i.e.
void setSourceAlignment(unsigned Align)
A container for analyses that lazily runs them and caches their results.
void print(raw_ostream &OS, const_iterator I, StringRef Indent=" ") const
Type * getArrayElementType() const
Legacy analysis pass which computes a DominatorTree.
static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, const DataLayout &DL)
Test whether the given alloca partition's integer operations can be widened to promotable ones...
bool operator==(uint64_t V1, const APInt &V2)
#define LLVM_ATTRIBUTE_UNUSED
This header defines various interfaces for pass management in LLVM.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Value * getPointerOperand()
static IntegerType * getInt8Ty(LLVMContext &C)
Value * getRawDest() const
static Constant * get(ArrayRef< Constant *> V)
Type * getElementType() const
const Use & getRawDestUse() const
void setDestAlignment(unsigned Align)
int64_t getIndexedOffsetInType(Type *ElemTy, ArrayRef< Value *> Indices) const
Returns the offset from the beginning of the type for the specified indices.
bool isArrayTy() const
True if this is an instance of ArrayType.
static Optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
A wrapper class for inspecting calls to intrinsic functions.
const BasicBlock * getParent() const
an instruction to allocate memory on the stack
gep_type_iterator gep_type_begin(const User *GEP)
static Value * foldSelectInst(SelectInst &SI)
ArrayRef< Instruction * > getDeadUsers() const
Access the dead users for this alloca.
bool isEscaped() const
Is the pointer escaped at some point?
bool operator==(const partition_iterator &RHS) const