72 #define DEBUG_TYPE "loop-accesses" 76 cl::desc(
"Sets the SIMD width. Zero is autoselect."),
82 cl::desc(
"Sets the vectorization interleave count. " 83 "Zero is autoselect."),
90 cl::desc(
"When performing memory disambiguation checks at runtime do not " 91 "generate more than this number of comparisons (default = 8)."),
98 cl::desc(
"Maximum number of comparisons done when trying to merge " 99 "runtime memory checks. (default = 100)"),
108 cl::desc(
"Maximum number of dependences collected by " 109 "loop-access analysis (default = 100)"),
125 cl::desc(
"Enable symbolic stride memory access versioning"));
130 "store-to-load-forwarding-conflict-detection",
cl::Hidden,
131 cl::desc(
"Enable conflict detection in loop-access analysis"),
139 if (
auto *CI = dyn_cast<CastInst>(V))
140 if (CI->getOperand(0)->getType()->isIntegerTy())
141 return CI->getOperand(0);
153 PtrToStride.
find(OrigPtr ? OrigPtr : Ptr);
154 if (SI != PtrToStride.
end()) {
155 Value *StrideVal = SI->second;
161 const auto *U = cast<SCEVUnknown>(SE->
getSCEV(StrideVal));
169 <<
" by: " << *Expr <<
"\n");
191 unsigned DepSetId,
unsigned ASId,
202 ScStart = ScEnd = Sc;
205 assert(AR &&
"Invalid addrec expression");
214 if (
const auto *CStep = dyn_cast<SCEVConstant>(Step)) {
215 if (CStep->getValue()->isNegative())
231 Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
235 RuntimePointerChecking::generateChecks()
const {
238 for (
unsigned I = 0;
I < CheckingGroups.size(); ++
I) {
239 for (
unsigned J =
I + 1; J < CheckingGroups.size(); ++J) {
243 if (needsChecking(CGI, CGJ))
244 Checks.
push_back(std::make_pair(&CGI, &CGJ));
250 void RuntimePointerChecking::generateChecks(
252 assert(Checks.empty() &&
"Checks is not empty");
253 groupChecks(DepCands, UseDependencies);
254 Checks = generateChecks();
260 for (
unsigned J = 0, EJ = N.
Members.
size(); EJ != J; ++J)
281 const SCEV *Start = RtCheck.Pointers[
Index].Start;
282 const SCEV *End = RtCheck.Pointers[
Index].End;
303 Members.push_back(Index);
307 void RuntimePointerChecking::groupChecks(
326 CheckingGroups.clear();
353 if (!UseDependencies) {
354 for (
unsigned I = 0;
I < Pointers.size(); ++
I)
359 unsigned TotalComparisons = 0;
363 PositionMap[Pointers[
Index].PointerValue] =
Index;
372 for (
unsigned I = 0;
I < Pointers.size(); ++
I) {
379 Pointers[
I].IsWritePtr);
391 unsigned Pointer = PositionMap[
MI->getPointer()];
408 if (Group.addPointer(Pointer)) {
423 llvm::copy(Groups, std::back_inserter(CheckingGroups));
430 return (PtrToPartition[PtrIdx1] != -1 &&
431 PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
455 unsigned Depth)
const {
457 for (
const auto &
Check : Checks) {
458 const auto &First =
Check.first->Members, &Second =
Check.second->Members;
460 OS.
indent(Depth) <<
"Check " << N++ <<
":\n";
462 OS.
indent(Depth + 2) <<
"Comparing group (" <<
Check.first <<
"):\n";
463 for (
unsigned K = 0; K < First.size(); ++K)
464 OS.
indent(Depth + 2) << *Pointers[First[K]].PointerValue <<
"\n";
466 OS.
indent(Depth + 2) <<
"Against group (" <<
Check.second <<
"):\n";
467 for (
unsigned K = 0; K < Second.size(); ++K)
468 OS.
indent(Depth + 2) << *Pointers[Second[K]].PointerValue <<
"\n";
474 OS.
indent(Depth) <<
"Run-time memory checks:\n";
475 printChecks(OS, Checks, Depth);
477 OS.
indent(Depth) <<
"Grouped accesses:\n";
478 for (
unsigned I = 0;
I < CheckingGroups.size(); ++
I) {
479 const auto &CG = CheckingGroups[
I];
481 OS.
indent(Depth + 2) <<
"Group " << &CG <<
":\n";
482 OS.
indent(Depth + 4) <<
"(Low: " << *CG.Low <<
" High: " << *CG.High
484 for (
unsigned J = 0; J < CG.Members.size(); ++J) {
485 OS.
indent(Depth + 6) <<
"Member: " << *Pointers[CG.Members[J]].Expr
497 class AccessAnalysis {
506 : DL(Dl), TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA),
507 IsRTCheckAnalysisNeeded(
false), PSE(PSE) {}
513 Accesses.insert(MemAccessInfo(Ptr,
false));
515 ReadOnlyPtr.insert(Ptr);
522 Accesses.insert(MemAccessInfo(Ptr,
true));
533 MemAccessInfo Access,
536 Loop *TheLoop,
unsigned &RunningDepId,
537 unsigned ASId,
bool ShouldCheckStride,
547 bool ShouldCheckWrap =
false);
551 void buildDependenceSets() {
552 processMemAccesses();
560 bool isDependencyCheckNeeded() {
return !CheckDeps.empty(); }
568 MemAccessInfoList &getDependenciesToCheck() {
return CheckDeps; }
575 void processMemAccesses();
578 PtrAccessSet Accesses;
586 MemAccessInfoList CheckDeps;
609 bool IsRTCheckAnalysisNeeded;
622 Loop *L,
bool Assume) {
655 MemAccessInfo Access,
658 Loop *TheLoop,
unsigned &RunningDepId,
659 unsigned ASId,
bool ShouldCheckWrap,
661 Value *Ptr = Access.getPointer();
668 if (ShouldCheckWrap && !
isNoWrap(PSE, StridesMap, Ptr, TheLoop)) {
669 auto *Expr = PSE.getSCEV(Ptr);
670 if (!Assume || !isa<SCEVAddRecExpr>(Expr))
678 if (isDependencyCheckNeeded()) {
680 unsigned &LeaderId = DepSetId[Leader];
682 LeaderId = RunningDepId++;
686 DepId = RunningDepId++;
688 bool IsWrite = Access.getInt();
689 RtCheck.
insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
690 LLVM_DEBUG(
dbgs() <<
"LAA: Found a runtime check ptr:" << *Ptr <<
'\n');
698 bool ShouldCheckWrap) {
703 bool NeedRTCheck =
false;
704 if (!IsRTCheckAnalysisNeeded)
return true;
706 bool IsDepCheckNeeded = isDependencyCheckNeeded();
711 for (
auto &AS : AST) {
712 int NumReadPtrChecks = 0;
713 int NumWritePtrChecks = 0;
714 bool CanDoAliasSetRT =
true;
718 unsigned RunningDepId = 1;
724 Value *Ptr = A.getValue();
725 bool IsWrite = Accesses.count(MemAccessInfo(Ptr,
true));
726 MemAccessInfo Access(Ptr, IsWrite);
733 if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop,
734 RunningDepId, ASId, ShouldCheckWrap,
false)) {
735 LLVM_DEBUG(
dbgs() <<
"LAA: Can't find bounds for ptr:" << *Ptr <<
'\n');
737 CanDoAliasSetRT =
false;
749 bool NeedsAliasSetRTCheck =
false;
750 if (!(IsDepCheckNeeded && CanDoAliasSetRT && RunningDepId == 2))
751 NeedsAliasSetRTCheck = (NumWritePtrChecks >= 2 ||
752 (NumReadPtrChecks >= 1 && NumWritePtrChecks >= 1));
756 if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
760 CanDoAliasSetRT =
true;
761 for (
auto Access : Retries)
762 if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId,
763 TheLoop, RunningDepId, ASId,
764 ShouldCheckWrap,
true)) {
765 CanDoAliasSetRT =
false;
770 CanDoRT &= CanDoAliasSetRT;
771 NeedRTCheck |= NeedsAliasSetRTCheck;
780 unsigned NumPointers = RtCheck.
Pointers.size();
781 for (
unsigned i = 0; i < NumPointers; ++i) {
782 for (
unsigned j = i + 1; j < NumPointers; ++j) {
784 if (RtCheck.
Pointers[i].DependencySetId ==
785 RtCheck.
Pointers[j].DependencySetId)
798 dbgs() <<
"LAA: Runtime check would require comparison between" 799 " different address spaces\n");
805 if (NeedRTCheck && CanDoRT)
809 <<
" pointer comparisons.\n");
811 RtCheck.
Need = NeedRTCheck;
813 bool CanDoRTIfNeeded = !NeedRTCheck || CanDoRT;
814 if (!CanDoRTIfNeeded)
816 return CanDoRTIfNeeded;
819 void AccessAnalysis::processMemAccesses() {
826 LLVM_DEBUG(
dbgs() <<
"LAA: Accesses(" << Accesses.size() <<
"):\n");
828 for (
auto A : Accesses)
829 dbgs() <<
"\t" << *A.getPointer() <<
" (" <<
830 (A.getInt() ?
"write" : (ReadOnlyPtr.count(A.getPointer()) ?
831 "read-only" :
"read")) <<
")\n";
838 for (
auto &AS : AST) {
843 bool SetHasWrite =
false;
847 UnderlyingObjToAccessMap ObjToLastAccess;
850 PtrAccessSet DeferredAccesses;
854 for (
int SetIteration = 0; SetIteration < 2; ++SetIteration) {
855 bool UseDeferred = SetIteration > 0;
856 PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
859 Value *Ptr = AV.getValue();
864 if (AC.getPointer() != Ptr)
867 bool IsWrite = AC.getInt();
871 bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
872 if (UseDeferred && !IsReadOnlyPtr)
876 assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
877 S.count(MemAccessInfo(Ptr,
false))) &&
878 "Alias-set pointer not in the access set?");
880 MemAccessInfo Access(Ptr, IsWrite);
888 if (!UseDeferred && IsReadOnlyPtr) {
889 DeferredAccesses.insert(Access);
897 if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
898 CheckDeps.push_back(Access);
899 IsRTCheckAnalysisNeeded =
true;
908 ValueVector TempObjects;
912 <<
"Underlying objects for pointer " << *Ptr <<
"\n");
913 for (
Value *UnderlyingObj : TempObjects) {
916 if (isa<ConstantPointerNull>(UnderlyingObj) &&
922 UnderlyingObjToAccessMap::iterator Prev =
923 ObjToLastAccess.find(UnderlyingObj);
924 if (Prev != ObjToLastAccess.end())
925 DepCands.
unionSets(Access, Prev->second);
927 ObjToLastAccess[UnderlyingObj] = Access;
938 return GEP->isInBounds();
959 if (!
GEP || !
GEP->isInBounds())
963 Value *NonConstIndex =
nullptr;
965 if (!isa<ConstantInt>(
Index)) {
968 NonConstIndex =
Index;
976 if (
auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex))
977 if (OBO->hasNoSignedWrap() &&
980 isa<ConstantInt>(OBO->getOperand(1))) {
981 auto *OpScev = PSE.
getSCEV(OBO->getOperand(0));
983 if (
auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev))
984 return OpAR->getLoop() == L && OpAR->getNoWrapFlags(
SCEV::FlagNSW);
993 bool Assume,
bool ShouldCheckWrap) {
998 auto *PtrTy = cast<PointerType>(Ty);
999 if (PtrTy->getElementType()->isAggregateType()) {
1000 LLVM_DEBUG(
dbgs() <<
"LAA: Bad stride - Not a pointer to a scalar type" 1012 LLVM_DEBUG(
dbgs() <<
"LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
1013 <<
" SCEV: " << *PtrScev <<
"\n");
1019 LLVM_DEBUG(
dbgs() <<
"LAA: Bad stride - Not striding over innermost loop " 1020 << *Ptr <<
" SCEV: " << *AR <<
"\n");
1032 bool IsNoWrapAddRec = !ShouldCheckWrap ||
1035 if (!IsNoWrapAddRec && !IsInBoundsGEP &&
1040 IsNoWrapAddRec =
true;
1041 LLVM_DEBUG(
dbgs() <<
"LAA: Pointer may wrap in the address space:\n" 1042 <<
"LAA: Pointer: " << *Ptr <<
"\n" 1043 <<
"LAA: SCEV: " << *AR <<
"\n" 1044 <<
"LAA: Added an overflow assumption\n");
1047 dbgs() <<
"LAA: Bad stride - Pointer may wrap in the address space " 1048 << *Ptr <<
" SCEV: " << *AR <<
"\n");
1059 LLVM_DEBUG(
dbgs() <<
"LAA: Bad stride - Not a constant strided " << *Ptr
1060 <<
" SCEV: " << *AR <<
"\n");
1065 int64_t
Size = DL.getTypeAllocSize(PtrTy->getElementType());
1075 int64_t Stride = StepVal /
Size;
1076 int64_t Rem = StepVal %
Size;
1083 if (!IsNoWrapAddRec && Stride != 1 && Stride != -1 &&
1088 LLVM_DEBUG(
dbgs() <<
"LAA: Non unit strided pointer which is not either " 1089 <<
"inbouds or in address space 0 may wrap:\n" 1090 <<
"LAA: Pointer: " << *Ptr <<
"\n" 1091 <<
"LAA: SCEV: " << *AR <<
"\n" 1092 <<
"LAA: Added an overflow assumption\n");
1106 "Expected list of pointer operands.");
1112 Value *Ptr0 = VL[0];
1117 for (
auto *Ptr : VL) {
1120 if (Ptr->getType()->getPointerAddressSpace() !=
1126 if (CurrObj != Obj0)
1138 int64_t
Offset = Diff->getAPInt().getSExtValue();
1139 if (!Offsets.
insert(Offset).second)
1143 SortedIndices.
clear();
1144 SortedIndices.
resize(VL.size());
1145 std::iota(SortedIndices.
begin(), SortedIndices.
end(), 0);
1148 std::stable_sort(SortedIndices.
begin(), SortedIndices.
end(),
1149 [&OffValPairs](
unsigned Left,
unsigned Right) {
1150 return OffValPairs[
Left].first < OffValPairs[
Right].first;
1154 if (
llvm::all_of(SortedIndices, [&SortedIndices](
const unsigned I) {
1155 return I == SortedIndices[
I];
1157 SortedIndices.
clear();
1165 if (
LoadInst *L = dyn_cast<LoadInst>(I))
1166 return L->getPointerAddressSpace();
1167 if (
StoreInst *S = dyn_cast<StoreInst>(I))
1168 return S->getPointerAddressSpace();
1181 if (!PtrA || !PtrB || (ASA != ASB))
1193 Type *Ty = cast<PointerType>(PtrA->
getType())->getElementType();
1196 APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
1209 return OffsetDelta ==
Size;
1221 return X == PtrSCEVB;
1229 case BackwardVectorizable:
1230 return VectorizationSafetyStatus::Safe;
1233 return VectorizationSafetyStatus::PossiblySafeWithRtChecks;
1234 case ForwardButPreventsForwarding:
1236 case BackwardVectorizableButPreventsForwarding:
1237 return VectorizationSafetyStatus::Unsafe;
1246 case ForwardButPreventsForwarding:
1250 case BackwardVectorizable:
1252 case BackwardVectorizableButPreventsForwarding:
1265 case ForwardButPreventsForwarding:
1270 case BackwardVectorizable:
1272 case BackwardVectorizableButPreventsForwarding:
1278 bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
1279 uint64_t TypeByteSize) {
1292 const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
1294 uint64_t MaxVFWithoutSLForwardIssues = std::min(
1295 VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes);
1298 for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
1302 if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1303 MaxVFWithoutSLForwardIssues = (VF >>= 1);
1308 if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
1310 dbgs() <<
"LAA: Distance " << Distance
1311 <<
" that could cause a store-load forwarding conflict\n");
1315 if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
1316 MaxVFWithoutSLForwardIssues !=
1317 VectorizerParams::MaxVectorWidth * TypeByteSize)
1318 MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
1340 const SCEV &BackedgeTakenCount,
1341 const SCEV &Dist, uint64_t Stride,
1342 uint64_t TypeByteSize) {
1361 const uint64_t ByteStride = Stride * TypeByteSize;
1365 const SCEV *CastedDist = &Dist;
1366 const SCEV *CastedProduct = Product;
1373 if (DistTypeSize > ProductTypeSize)
1400 uint64_t TypeByteSize) {
1401 assert(Stride > 1 &&
"The stride must be greater than 1");
1402 assert(TypeByteSize > 0 &&
"The type size in byte must be non-zero");
1403 assert(Distance > 0 &&
"The distance must be non-zero");
1406 if (Distance % TypeByteSize)
1409 uint64_t ScaledDist = Distance / TypeByteSize;
1427 return ScaledDist % Stride;
1431 MemoryDepChecker::isDependent(
const MemAccessInfo &A,
unsigned AIdx,
1432 const MemAccessInfo &
B,
unsigned BIdx,
1434 assert (AIdx < BIdx &&
"Must pass arguments in program order");
1436 Value *APtr = A.getPointer();
1437 Value *BPtr = B.getPointer();
1438 bool AIsWrite = A.getInt();
1439 bool BIsWrite = B.getInt();
1442 if (!AIsWrite && !BIsWrite)
1443 return Dependence::NoDep;
1450 int64_t StrideAPtr =
getPtrStride(PSE, APtr, InnermostLoop, Strides,
true);
1451 int64_t StrideBPtr =
getPtrStride(PSE, BPtr, InnermostLoop, Strides,
true);
1453 const SCEV *Src = PSE.getSCEV(APtr);
1454 const SCEV *
Sink = PSE.getSCEV(BPtr);
1458 if (StrideAPtr < 0) {
1466 const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src);
1468 LLVM_DEBUG(
dbgs() <<
"LAA: Src Scev: " << *Src <<
"Sink Scev: " << *Sink
1469 <<
"(Induction step: " << StrideAPtr <<
")\n");
1470 LLVM_DEBUG(
dbgs() <<
"LAA: Distance for " << *InstMap[AIdx] <<
" to " 1471 << *InstMap[BIdx] <<
": " << *Dist <<
"\n");
1476 if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
1477 LLVM_DEBUG(
dbgs() <<
"Pointer access with non-constant stride\n");
1483 auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
1484 uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
1485 uint64_t Stride =
std::abs(StrideAPtr);
1488 if (TypeByteSize == DL.getTypeAllocSize(BTy) &&
1490 *(PSE.getBackedgeTakenCount()), *Dist, Stride,
1492 return Dependence::NoDep;
1494 LLVM_DEBUG(
dbgs() <<
"LAA: Dependence because of non-constant distance\n");
1495 FoundNonConstantDistanceDependence =
true;
1503 if (
std::abs(Distance) > 0 && Stride > 1 && ATy == BTy &&
1506 return Dependence::NoDep;
1511 bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
1513 (couldPreventStoreLoadForward(Val.
abs().
getZExtValue(), TypeByteSize) ||
1515 LLVM_DEBUG(
dbgs() <<
"LAA: Forward but may prevent st->ld forwarding\n");
1516 return Dependence::ForwardButPreventsForwarding;
1520 return Dependence::Forward;
1527 return Dependence::Forward;
1529 dbgs() <<
"LAA: Zero dependence difference but different types\n");
1538 <<
"LAA: ReadWrite-Write positive dependency with different types\n");
1543 unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
1545 unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
1548 unsigned MinNumIter =
std::max(ForcedFactor * ForcedUnroll, 2U);
1576 uint64_t MinDistanceNeeded =
1577 TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
1578 if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
1579 LLVM_DEBUG(
dbgs() <<
"LAA: Failure because of positive distance " 1580 << Distance <<
'\n');
1581 return Dependence::Backward;
1585 if (MinDistanceNeeded > MaxSafeDepDistBytes) {
1587 << MinDistanceNeeded <<
" size in bytes");
1588 return Dependence::Backward;
1607 MaxSafeDepDistBytes =
1608 std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes);
1610 bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
1612 couldPreventStoreLoadForward(Distance, TypeByteSize))
1613 return Dependence::BackwardVectorizableButPreventsForwarding;
1615 uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride);
1617 <<
" with max VF = " << MaxVF <<
'\n');
1618 uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
1619 MaxSafeRegisterWidth = std::min(MaxSafeRegisterWidth, MaxVFInBits);
1620 return Dependence::BackwardVectorizable;
1627 MaxSafeDepDistBytes = -1;
1630 if (Visited.
count(CurAccess))
1649 for (std::vector<unsigned>::iterator I1 = Accesses[*AI].
begin(),
1650 I1E = Accesses[*AI].
end(); I1 != I1E; ++I1)
1651 for (std::vector<unsigned>::iterator I2 = Accesses[*OI].
begin(),
1652 I2E = Accesses[*OI].
end(); I2 != I2E; ++I2) {
1653 auto A = std::make_pair(&*AI, *I1);
1654 auto B = std::make_pair(&*OI, *I2);
1661 isDependent(*A.first, A.second, *B.first, B.second, Strides);
1662 mergeInStatus(Dependence::isSafeForVectorization(Type));
1668 if (RecordDependences) {
1669 if (Type != Dependence::NoDep)
1670 Dependences.push_back(
Dependence(A.second, B.second, Type));
1673 RecordDependences =
false;
1674 Dependences.clear();
1676 <<
"Too many dependences, stopped recording\n");
1679 if (!RecordDependences && !isSafeForVectorization())
1688 LLVM_DEBUG(
dbgs() <<
"Total Dependences: " << Dependences.size() <<
"\n");
1689 return isSafeForVectorization();
1695 auto &IndexVector = Accesses.find(Access)->second;
1699 std::back_inserter(Insts),
1700 [&](
unsigned Idx) {
return this->InstMap[Idx]; });
1705 "NoDep",
"Unknown",
"Forward",
"ForwardButPreventsForwarding",
"Backward",
1706 "BackwardVectorizable",
"BackwardVectorizableButPreventsForwarding"};
1713 OS.
indent(Depth + 2) << *Instrs[Destination] <<
"\n";
1716 bool LoopAccessInfo::canAnalyzeLoop() {
1723 if (!TheLoop->
empty()) {
1725 recordAnalysis(
"NotInnerMostLoop") <<
"loop is not the innermost loop";
1732 dbgs() <<
"LAA: loop control flow is not understood by analyzer\n");
1733 recordAnalysis(
"CFGNotUnderstood")
1734 <<
"loop control flow is not understood by analyzer";
1741 dbgs() <<
"LAA: loop control flow is not understood by analyzer\n");
1742 recordAnalysis(
"CFGNotUnderstood")
1743 <<
"loop control flow is not understood by analyzer";
1752 dbgs() <<
"LAA: loop control flow is not understood by analyzer\n");
1753 recordAnalysis(
"CFGNotUnderstood")
1754 <<
"loop control flow is not understood by analyzer";
1759 const SCEV *ExitCount = PSE->getBackedgeTakenCount();
1760 if (ExitCount == PSE->getSE()->getCouldNotCompute()) {
1761 recordAnalysis(
"CantComputeNumberOfIterations")
1762 <<
"could not determine number of loop iterations";
1763 LLVM_DEBUG(
dbgs() <<
"LAA: SCEV could not compute the loop exit count.\n");
1780 unsigned NumReads = 0;
1781 unsigned NumReadWrites = 0;
1783 PtrRtChecking->Pointers.
clear();
1784 PtrRtChecking->Need =
false;
1795 if (
I.mayReadFromMemory()) {
1805 if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
1810 if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
1811 recordAnalysis(
"NonSimpleLoad", Ld)
1812 <<
"read with atomic ordering or volatile read";
1819 DepChecker->addAccess(Ld);
1821 collectStridedAccess(Ld);
1826 if (
I.mayWriteToMemory()) {
1829 recordAnalysis(
"CantVectorizeInstruction", St)
1830 <<
"instruction cannot be vectorized";
1834 if (!St->isSimple() && !IsAnnotatedParallel) {
1835 recordAnalysis(
"NonSimpleStore", St)
1836 <<
"write with atomic ordering or volatile write";
1843 DepChecker->addAccess(St);
1845 collectStridedAccess(St);
1855 if (!Stores.
size()) {
1863 TheLoop, AA, LI, DependentAccesses, *PSE);
1874 ValueSet UniformStores;
1877 Value *Ptr =
ST->getPointerOperand();
1880 HasDependenceInvolvingLoopInvariantAddress |=
1881 !UniformStores.insert(Ptr).second;
1885 if (Seen.insert(Ptr).second) {
1892 if (blockNeedsPredication(
ST->getParent(), TheLoop, DT))
1895 Accesses.addStore(Loc);
1899 if (IsAnnotatedParallel) {
1901 dbgs() <<
"LAA: A loop annotated parallel, ignore memory dependency " 1908 Value *Ptr =
LD->getPointerOperand();
1917 bool IsReadOnlyPtr =
false;
1918 if (Seen.insert(Ptr).second ||
1921 IsReadOnlyPtr =
true;
1926 if (UniformStores.count(Ptr)) {
1927 LLVM_DEBUG(
dbgs() <<
"LAA: Found an unsafe dependency between a uniform " 1928 "load and uniform store to the same address!\n");
1929 HasDependenceInvolvingLoopInvariantAddress =
true;
1936 if (blockNeedsPredication(
LD->getParent(), TheLoop, DT))
1939 Accesses.addLoad(Loc, IsReadOnlyPtr);
1944 if (NumReadWrites == 1 && NumReads == 0) {
1952 Accesses.buildDependenceSets();
1956 bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(),
1957 TheLoop, SymbolicStrides);
1958 if (!CanDoRTIfNeeded) {
1959 recordAnalysis(
"CantIdentifyArrayBounds") <<
"cannot identify array bounds";
1960 LLVM_DEBUG(
dbgs() <<
"LAA: We can't vectorize because we can't find " 1961 <<
"the array bounds.\n");
1967 dbgs() <<
"LAA: We can perform a memory runtime check if needed.\n");
1970 if (Accesses.isDependencyCheckNeeded()) {
1972 CanVecMem = DepChecker->areDepsSafe(
1973 DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides);
1974 MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes();
1976 if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) {
1980 Accesses.resetDepChecks(*DepChecker);
1982 PtrRtChecking->reset();
1983 PtrRtChecking->Need =
true;
1985 auto *SE = PSE->getSE();
1986 CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, SE, TheLoop,
1987 SymbolicStrides,
true);
1990 if (!CanDoRTIfNeeded) {
1991 recordAnalysis(
"CantCheckMemDepsAtRunTime")
1992 <<
"cannot check memory dependencies at runtime";
1993 LLVM_DEBUG(
dbgs() <<
"LAA: Can't vectorize with memory checks\n");
2004 dbgs() <<
"LAA: No unsafe dependent memory operations in loop. We" 2005 << (PtrRtChecking->Need ?
"" :
" don't")
2006 <<
" need runtime memory checks.\n");
2008 recordAnalysis(
"UnsafeMemDep")
2009 <<
"unsafe dependent memory operations in loop. Use " 2010 "#pragma loop distribute(enable) to allow loop distribution " 2011 "to attempt to isolate the offending operations into a separate " 2013 LLVM_DEBUG(
dbgs() <<
"LAA: unsafe dependent memory operations in loop\n");
2028 assert(!Report &&
"Multiple reports generated");
2041 Report = make_unique<OptimizationRemarkAnalysis>(
DEBUG_TYPE, RemarkName, DL,
2047 auto *SE = PSE->getSE();
2074 struct PointerBounds {
2083 static PointerBounds
2097 LLVM_DEBUG(
dbgs() <<
"LAA: Adding RT check for a loop invariant ptr:" 2108 return {NewPtr, NewPtrPlusOne};
2110 Value *Start =
nullptr, *End =
nullptr;
2116 return {Start, End};
2131 PointerChecks, std::back_inserter(ChecksWithBounds),
2134 First =
expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking),
2135 Second =
expandBounds(Check.second, L, Loc, Exp, SE, PtrRtChecking);
2136 return std::make_pair(First, Second);
2139 return ChecksWithBounds;
2147 auto *SE = PSE->getSE();
2149 auto ExpandedChecks =
2150 expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, *PtrRtChecking);
2156 Value *MemoryRuntimeCheck =
nullptr;
2158 for (
const auto &
Check : ExpandedChecks) {
2159 const PointerBounds &A =
Check.first, &B =
Check.second;
2162 unsigned AS0 = A.Start->getType()->getPointerAddressSpace();
2163 unsigned AS1 = B.Start->getType()->getPointerAddressSpace();
2165 assert((AS0 == B.End->getType()->getPointerAddressSpace()) &&
2166 (AS1 == A.End->getType()->getPointerAddressSpace()) &&
2167 "Trying to bounds check pointers with different address spaces");
2189 Value *IsConflict = ChkBuilder.
CreateAnd(Cmp0, Cmp1,
"found.conflict");
2191 if (MemoryRuntimeCheck) {
2193 ChkBuilder.
CreateOr(MemoryRuntimeCheck, IsConflict,
"conflict.rdx");
2196 MemoryRuntimeCheck = IsConflict;
2199 if (!MemoryRuntimeCheck)
2200 return std::make_pair(
nullptr,
nullptr);
2207 ChkBuilder.
Insert(Check,
"memcheck.conflict");
2209 return std::make_pair(FirstInst, Check);
2212 std::pair<Instruction *, Instruction *>
2214 if (!PtrRtChecking->Need)
2215 return std::make_pair(
nullptr,
nullptr);
2217 return addRuntimeChecks(Loc, PtrRtChecking->getChecks());
2220 void LoopAccessInfo::collectStridedAccess(
Value *MemAccess) {
2221 Value *Ptr =
nullptr;
2222 if (
LoadInst *LI = dyn_cast<LoadInst>(MemAccess))
2223 Ptr = LI->getPointerOperand();
2224 else if (
StoreInst *
SI = dyn_cast<StoreInst>(MemAccess))
2225 Ptr =
SI->getPointerOperand();
2233 LLVM_DEBUG(
dbgs() <<
"LAA: Found a strided access that is a candidate for " 2235 LLVM_DEBUG(
dbgs() <<
" Ptr: " << *Ptr <<
" Stride: " << *Stride <<
"\n");
2250 const SCEV *StrideExpr = PSE->getSCEV(Stride);
2251 const SCEV *BETakenCount = PSE->getBackedgeTakenCount();
2259 const SCEV *CastedStride = StrideExpr;
2260 const SCEV *CastedBECount = BETakenCount;
2262 if (BETypeSize >= StrideTypeSize)
2266 const SCEV *StrideMinusBETaken = SE->
getMinusSCEV(CastedStride, CastedBECount);
2272 dbgs() <<
"LAA: Stride>=TripCount; No point in versioning as the " 2273 "Stride==1 predicate will imply that the loop executes " 2277 LLVM_DEBUG(
dbgs() <<
"LAA: Found a strided access that we can version.");
2279 SymbolicStrides[Ptr] = Stride;
2280 StrideSet.insert(Stride);
2289 NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(
false),
2290 HasDependenceInvolvingLoopInvariantAddress(
false) {
2291 if (canAnalyzeLoop())
2292 analyzeLoop(AA, LI, TLI, DT);
2297 OS.
indent(Depth) <<
"Memory dependences are safe";
2298 if (MaxSafeDepDistBytes != -1ULL)
2299 OS <<
" with a maximum dependence distance of " << MaxSafeDepDistBytes
2301 if (PtrRtChecking->Need)
2302 OS <<
" with run-time checks";
2307 OS.
indent(Depth) <<
"Report: " << Report->getMsg() <<
"\n";
2309 if (
auto *Dependences = DepChecker->getDependences()) {
2310 OS.
indent(Depth) <<
"Dependences:\n";
2311 for (
auto &Dep : *Dependences) {
2312 Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
2316 OS.
indent(Depth) <<
"Too many dependences, not recorded\n";
2319 PtrRtChecking->print(OS, Depth);
2322 OS.
indent(Depth) <<
"Non vectorizable stores to invariant address were " 2323 << (HasDependenceInvolvingLoopInvariantAddress ?
"" :
"not ")
2324 <<
"found in loop.\n";
2326 OS.
indent(Depth) <<
"SCEV assumptions:\n";
2327 PSE->getUnionPredicate().print(OS, Depth);
2331 OS.
indent(Depth) <<
"Expressions re-written:\n";
2332 PSE->print(OS, Depth);
2336 auto &LAI = LoopAccessInfoMap[L];
2339 LAI = llvm::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
2347 for (
Loop *TopLevelLoop : *LI)
2349 OS.
indent(2) << L->getHeader()->getName() <<
":\n";
2356 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2357 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2358 TLI = TLIP ? &TLIP->getTLI() :
nullptr;
2359 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2360 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2361 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2377 #define LAA_NAME "loop-accesses" 2390 return LoopAccessInfo(&L, &AR.SE, &AR.TLI, &AR.AA, &AR.DT, &AR.LI);
static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, const SCEV &BackedgeTakenCount, const SCEV &Dist, uint64_t Stride, uint64_t TypeByteSize)
Given a non-constant (unknown) dependence-distance Dist between two memory accesses, that have the same stride whose absolute value is given in Stride, and that have the same type size TypeByteSize, in a loop whose takenCount is BackedgeTakenCount, check if it is possible to prove statically that the dependence distance is larger than the range that the accesses will travel through the execution of the loop.
APInt abs() const
Get the absolute value;.
Pass interface - Implemented by all 'passes'.
static unsigned RuntimeMemoryCheckThreshold
performing memory disambiguation checks at runtime do not make more than this number of comparisons...
static bool Check(DecodeStatus &Out, DecodeStatus In)
static const char laa_name[]
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
static cl::opt< unsigned, true > VectorizationInterleave("force-vector-interleave", cl::Hidden, cl::desc("Sets the vectorization interleave count. " "Zero is autoselect."), cl::location(VectorizerParams::VectorizationInterleave))
A parsed version of the target data layout string in and methods for querying it. ...
const_iterator end(StringRef path)
Get end iterator over path.
void printChecks(raw_ostream &OS, const SmallVectorImpl< PointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
void setNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Proves that V doesn't overflow by adding SCEV predicate.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
uint64_t getZExtValue() const
Get zero extended value.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
bool isBackward() const
Lexically backward dependence.
const SCEV * getConstant(ConstantInt *V)
static bool isInBoundsGep(Value *Ptr)
This class represents lattice values for constants.
static cl::opt< unsigned > MemoryCheckMergeThreshold("memory-check-merge-threshold", cl::Hidden, cl::desc("Maximum number of comparisons done when trying to merge " "runtime memory checks. (default = 100)"), cl::init(100))
The maximum iterations used to merge memory checks.
MDNode * TBAA
The tag for type-based alias analysis.
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
A Module instance is used to store all the information related to an LLVM module. ...
static constexpr LocationSize unknown()
const ElemTy & getLeaderValue(const ElemTy &V) const
getLeaderValue - Return the leader for the specified value that is in the set.
void push_back(const T &Elt)
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
The main scalar evolution driver.
This class represents a function call, abstracting a target machine's calling convention.
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
bool IsWritePtr
Holds the information if this pointer is used for writing to memory.
LLVMContext & getContext() const
All values hold a context through their type.
void reset()
Reset the state of the pointer runtime information.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
void print(raw_ostream &OS, unsigned Depth=0) const
Print the list run-time memory checks necessary.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
block Block Frequency true
An instruction for reading from memory.
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
std::enable_if<!std::is_array< T >::value, std::unique_ptr< T > >::type make_unique(Args &&... args)
Constructs a new T() with the given args and returns a unique_ptr<T> which owns the object...
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
static unsigned VectorizationFactor
VF as overridden by the user.
void reserve(size_type N)
static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, uint64_t TypeByteSize)
Check the dependence for two accesses with the same stride Stride.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides, PredicatedScalarEvolution &PSE)
Insert a pointer and calculate the start and end SCEVs.
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
#define INITIALIZE_PASS_DEPENDENCY(depName)
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Type * getPointerElementType() const
const Loop * getLoop() const
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
static const unsigned MaxVectorWidth
Maximum SIMD width.
bool isPossiblyBackward() const
May be a lexically backward dependence type (includes Unknown).
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
const SCEVPredicate * getEqualPredicate(const SCEV *LHS, const SCEV *RHS)
unsigned getNumberOfChecks() const
Returns the number of run-time checks required according to needsChecking.
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
This file implements a class to represent arbitrary precision integral constant values and operations...
static const char * DepName[]
String version of the types.
const APInt & getAPInt() const
BlockT * getHeader() const
ConstantInt * getValue() const
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
int64_t getSExtValue() const
Get sign extended value.
Type * getType() const
All values are typed, get the type of this value.
member_iterator member_begin(iterator I) const
This node represents a polynomial recurrence on the trip count of the specified loop.
static cl::opt< bool > EnableForwardingConflictDetection("store-to-load-forwarding-conflict-detection", cl::Hidden, cl::desc("Enable conflict detection in loop-access analysis"), cl::init(true))
Enable store-to-load forwarding conflict detection.
iterator insert(const ElemTy &Data)
insert - Insert a new value into the union/find set, ignoring the request if the value already exists...
const SCEVAddRecExpr * getAsAddRec(Value *V)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
This header provides classes for managing per-loop analyses.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
iterator findValue(const ElemTy &V) const
findValue - Return an iterator to the specified value.
Value * getLoadStorePointerOperand(Value *V)
A helper function that returns the pointer operand of a load or store instruction.
An instruction for storing to memory.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
static Instruction * getFirstInst(Instruction *FirstInst, Value *V, Instruction *Loc)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
iterator find(const_arg_type_t< KeyT > Val)
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
std::pair< Instruction *, Instruction * > addRuntimeChecks(Instruction *Loc) const
Add code that checks at runtime if the accessed arrays overlap.
bool isForward() const
Lexically forward dependence.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
bool isNegative() const
Determine sign of this APInt.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
initializer< Ty > init(const Ty &Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
bool needsChecking(const CheckingPtrGroup &M, const CheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
LLVM Basic Block Representation.
The instances of the Type class are immutable: once they are created, they are never changed...
This is an important class for using LLVM in a threaded context.
static cl::opt< unsigned > MaxDependences("max-dependences", cl::Hidden, cl::desc("Maximum number of dependences collected by " "loop-access analysis (default = 100)"), cl::init(100))
We collect dependences up to this threshold.
Value handle that tracks a Value across RAUW.
size_t size() const
size - Get the array size.
This analysis provides dependence information for the memory accesses of a loop.
void print(raw_ostream &OS, unsigned Depth=0) const
Print the information about the memory accesses in the loop.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
static bool hasComputableBounds(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L, bool Assume)
Check whether a pointer can participate in a runtime bounds check.
bool addPointer(unsigned Index)
Tries to add the pointer recorded in RtCheck at index Index to this pointer checking group...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
member_iterator member_end() const
Represent the analysis usage information of a pass.
VectorizationSafetyStatus
Type to keep track of the status of the dependence check.
unsigned getAddressSpace() const
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr=nullptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one...
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
const SCEV * getStart() const
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
std::pair< const CheckingPtrGroup *, const CheckingPtrGroup * > PointerCheck
A memcheck which made up of a pair of grouped pointers.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Value * stripIntegerCast(Value *V)
const SCEV * getMulExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned AliasSetId
Holds the id of the disjoint alias set to which this pointer belongs.
DepType
The type of the dependence.
SmallVector< unsigned, 2 > Members
Indices of all the pointers that constitute this grouping.
const Value * Ptr
The address of the start of the location.
Representation for a specific memory location.
bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, const ValueToValueMap &Strides)
Check whether the dependencies between the accesses are safe.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static const X86InstrFMA3Group Groups[]
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Returns true if we've proved that V doesn't wrap by means of a SCEV predicate.
static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, PredicatedScalarEvolution &PSE, const Loop *L)
Return true if an AddRec pointer Ptr is unsigned non-wrapping, i.e.
bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
Type * getType() const
Return the LLVM type of this SCEV expression.
bool isUniform(Value *V) const
Returns true if the value V is uniform within the loop.
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass...
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
bool isFunctionVectorizable(StringRef F, unsigned VF) const
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
static const SCEV * getMinFromExprs(const SCEV *I, const SCEV *J, ScalarEvolution *SE)
Compare I and J and return the minimum.
Provides information about what library functions are available for the current target.
void print(raw_ostream &OS, unsigned Depth, const SmallVectorImpl< Instruction *> &Instrs) const
Print the dependence.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Drive the analysis of memory accesses in the loop.
const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
static ConstantInt * getTrue(LLVMContext &Context)
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
const LoopAccessInfo & getInfo(Loop *L)
Query the result of the loop access information for the loop L.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
Accumulate offsets from stripInBoundsConstantOffsets().
Class for arbitrary precision integers.
void generateChecks(MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies)
Generate the checks and store it.
void setPreservesAll()
Set by analyses that do not transform their input at all.
This class uses information about analyze scalars to rewrite expressions in canonical form...
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
This analysis provides dependence information for the memory accesses of a loop.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
static PointerBounds expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop, Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE, const RuntimePointerChecking &PtrRtChecking)
Expand code for the lower and upper bound of the pointer group CG in TheLoop.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Dependece between memory access instructions.
void emplace_back(ArgTypes &&... Args)
This class represents an analyzed expression in the program.
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
static cl::opt< bool > EnableMemAccessVersioning("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning"))
This enables versioning on the strides of symbolically striding memory accesses in code like the foll...
Represents a single loop in the control flow graph.
This file provides utility analysis objects describing memory locations.
StringRef getName() const
Return a constant reference to the value's name.
const Function * getParent() const
Return the enclosing method, or null if none.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
static cl::opt< unsigned, true > VectorizationFactor("force-vector-width", cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."), cl::location(VectorizerParams::VectorizationFactor))
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
static unsigned getAddressSpaceOperand(Value *I)
Take the address space operand from the Load/Store instruction.
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
static LLVM_ATTRIBUTE_ALWAYS_INLINE bool CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL)
void GetUnderlyingObjects(Value *V, SmallVectorImpl< Value *> &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
iterator_range< df_iterator< T > > depth_first(const T &G)
static VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
typename std::set< ECValue >::const_iterator iterator
iterator* - Provides a way to iterate over all values in the set.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isAnnotatedParallel() const
Returns true if the loop is annotated parallel.
const SCEV * getUMinExpr(const SCEV *LHS, const SCEV *RHS)
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM Value Representation.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
A vector that has set insertion semantics.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
This class implements an extremely fast bulk output stream that can only output to a stream...
The legacy pass manager's analysis pass to compute loop information.
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.
StringRef - Represent a constant reference to a string, i.e.
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
This header defines various interfaces for pass management in LLVM.
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Find the set of instructions that read or write via Ptr.
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
OutputIt copy(R &&Range, OutputIt Out)
iterator_range< block_iterator > blocks() const
bool sortPtrAccesses(ArrayRef< Value *> VL, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices)
Attempt to sort the pointers in VL and return the sorted indices in SortedIndices, if reordering is required.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
A special type used by analysis passes to provide an address that identifies that particular analysis...
LocationClass< Ty > location(Ty &L)
PointerType * getType() const
Global values are always pointers.
static cl::opt< unsigned, true > RuntimeMemoryCheckThreshold("runtime-memory-check-threshold", cl::Hidden, cl::desc("When performing memory disambiguation checks at runtime do not " "generate more than this number of comparisons (default = 8)."), cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8))
void print(raw_ostream &OS, const Module *M=nullptr) const override
Print the result of the analysis when invoked with -analyze.
unsigned DependencySetId
Holds the id of the set of pointers that could be dependent because of a shared underlying object...
static bool isNoWrap(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L)
Check whether a pointer address cannot wrap.
const BasicBlock * getParent() const
This class represents a constant integer value.
bool Need
This flag indicates if we need to add the runtime check.
LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI)
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.