84 #define DEBUG_TYPE "divergence" 89 cl::desc(
"turn the LegacyDivergenceAnalysis into " 90 "a wrapper for GPUDivergenceAnalysis"));
98 : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {}
99 void populateWithSourcesOfDivergence();
104 void exploreDataDependency(
Value *V);
113 void findUsersOutsideInfluenceRegion(
120 std::vector<Value *> Worklist;
124 void DivergencePropagator::populateWithSourcesOfDivergence() {
128 if (TTI.isSourceOfDivergence(&
I)) {
129 Worklist.push_back(&
I);
133 for (
auto &
Arg :
F.args()) {
134 if (TTI.isSourceOfDivergence(&
Arg)) {
135 Worklist.push_back(&
Arg);
141 void DivergencePropagator::exploreSyncDependency(
Instruction *TI) {
164 if (IPostDom ==
nullptr)
167 for (
auto I = IPostDom->
begin(); isa<PHINode>(
I); ++
I) {
170 if (!cast<PHINode>(
I)->hasConstantOrUndefValue() && DV.insert(&*
I).second)
171 Worklist.push_back(&*
I);
194 computeInfluenceRegion(ThisBB, IPostDom, InfluenceRegion);
200 while (InfluenceRegion.
count(InfluencedBB)) {
201 for (
auto &
I : *InfluencedBB)
202 findUsersOutsideInfluenceRegion(
I, InfluenceRegion);
204 if (IDomNode ==
nullptr)
206 InfluencedBB = IDomNode->
getBlock();
210 void DivergencePropagator::findUsersOutsideInfluenceRegion(
215 if (DV.insert(UserInst).second)
216 Worklist.push_back(UserInst);
226 std::vector<BasicBlock *> &InfluenceStack) {
228 if (Succ != End && InfluenceRegion.
insert(Succ).second)
229 InfluenceStack.push_back(Succ);
233 void DivergencePropagator::computeInfluenceRegion(
237 "End does not properly dominate Start");
242 std::vector<BasicBlock *> InfluenceStack;
243 addSuccessorsToInfluenceRegion(Start, End, InfluenceRegion, InfluenceStack);
244 while (!InfluenceStack.empty()) {
246 InfluenceStack.pop_back();
247 addSuccessorsToInfluenceRegion(BB, End, InfluenceRegion, InfluenceStack);
251 void DivergencePropagator::exploreDataDependency(
Value *V) {
255 if (!TTI.isAlwaysUniform(U) && DV.insert(UserInst).second)
256 Worklist.push_back(UserInst);
262 while (!Worklist.empty()) {
263 Value *V = Worklist.back();
269 exploreSyncDependency(I);
271 exploreDataDependency(V);
280 "Legacy Divergence Analysis",
false,
true)
288 return new LegacyDivergenceAnalysis();
299 bool LegacyDivergenceAnalysis::shouldUseGPUDivergenceAnalysis(
305 auto &
LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
313 auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
314 if (TTIWP ==
nullptr)
323 DivergentValues.clear();
326 auto &
DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
327 auto &
PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
329 if (shouldUseGPUDivergenceAnalysis(F)) {
331 auto &
LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
332 gpuDA = llvm::make_unique<GPUDivergenceAnalysis>(
F,
DT,
PDT,
LI, TTI);
337 DP.populateWithSourcesOfDivergence();
350 return gpuDA->isDivergent(*V);
352 return DivergentValues.count(V);
356 if ((!gpuDA || !gpuDA->hasDivergence()) && DivergentValues.empty())
360 if (!DivergentValues.empty()) {
361 const Value *FirstDivergentValue = *DivergentValues.begin();
362 if (
const Argument *
Arg = dyn_cast<Argument>(FirstDivergentValue)) {
363 F =
Arg->getParent();
365 dyn_cast<Instruction>(FirstDivergentValue)) {
378 OS << (isDivergent(&
Arg) ?
"DIVERGENT: " :
" ");
382 for (
auto BI = F->
begin(), BE = F->
end(); BI != BE; ++BI) {
384 OS <<
"\n " << BB.
getName() <<
":\n";
386 OS << (isDivergent(&I) ?
"DIVERGENT: " :
" ");
const Function & getFunction() const
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
void print(raw_ostream &OS, const Module *) const override
print - Print out the internal state of the pass.
bool isDivergent(const Value *V) const
This class represents an incoming formal argument to a Function.
This class represents lattice values for constants.
A Module instance is used to store all the information related to an LLVM module. ...
static cl::opt< bool > UseGPUDA("use-gpu-divergence-analysis", cl::init(false), cl::Hidden, cl::desc("turn the LegacyDivergenceAnalysis into " "a wrapper for GPUDivergenceAnalysis"))
bool isTerminator() const
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
const PostDominatorTree & PDT
block Block Frequency true
INITIALIZE_PASS_BEGIN(LegacyDivergenceAnalysis, "divergence", "Legacy Divergence Analysis", false, true) INITIALIZE_PASS_END(LegacyDivergenceAnalysis
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
iterator begin()
Instruction iterator methods.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass...
bool containsIrreducibleCFG(RPOTraversalT &RPOTraversal, const LoopInfoT &LI)
Return true if the control flow in RPOTraversal is irreducible.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
const FunctionRPOT & FuncRPOT
initializer< Ty > init(const Ty &Val)
LLVM Basic Block Representation.
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug() const
Return a const iterator range over the instructions in the block, skipping any debug instructions...
DomTreeNodeBase * getIDom() const
std::pair< iterator, bool > insert(const ValueT &V)
DivergencePropagator(const FunctionRPOT &FuncRPOT, const DominatorTree &DT, const PostDominatorTree &PDT, const LoopInfo &LI)
Represent the analysis usage information of a pass.
const Instruction & back() const
FunctionPass class - This class is used to implement most global optimizations.
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Legacy Divergence Analysis
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void setPreservesAll()
Set by analyses that do not transform their input at all.
iterator_range< user_iterator > users()
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
static void propagate(InstantiatedValue From, InstantiatedValue To, MatchState State, ReachabilitySet &ReachSet, std::vector< WorkListItem > &WorkList)
amdgpu Simplify well known AMD library false Value Value * Arg
FunctionPass * createLegacyDivergenceAnalysisPass()
StringRef getName() const
Return a constant reference to the value's name.
const Function * getParent() const
Return the enclosing method, or null if none.
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Module * getParent()
Get the module that this global value is contained inside of...
LLVM Value Representation.
succ_range successors(Instruction *I)
This class implements an extremely fast bulk output stream that can only output to a stream...
The legacy pass manager's analysis pass to compute loop information.
inst_range instructions(Function *F)
Legacy analysis pass which computes a DominatorTree.
iterator_range< arg_iterator > args()
const BasicBlock * getParent() const