92 #define DEBUG_TYPE "divergence-analysis" 98 : F(F), RegionLoop(RegionLoop), DT(DT), LI(LI), SDA(SDA),
99 IsLCSSAForm(IsLCSSAForm) {}
102 assert(isa<Instruction>(DivVal) || isa<Argument>(DivVal));
104 DivergentValues.
insert(&DivVal);
108 UniformOverrides.
insert(&UniVal);
111 bool DivergenceAnalysis::updateTerminator(
const Instruction &Term)
const {
114 if (
auto *BranchTerm = dyn_cast<BranchInst>(&Term)) {
115 assert(BranchTerm->isConditional());
118 if (
auto *SwitchTerm = dyn_cast<SwitchInst>(&Term)) {
121 if (isa<InvokeInst>(Term)) {
128 bool DivergenceAnalysis::updateNormalInstruction(
const Instruction &
I)
const {
137 bool DivergenceAnalysis::isTemporalDivergent(
const BasicBlock &ObservingBlock,
138 const Value &Val)
const {
147 if (DivergentLoops.find(
Loop) != DivergentLoops.end())
154 bool DivergenceAnalysis::updatePHINode(
const PHINode &Phi)
const {
173 isTemporalDivergent(*Phi.
getParent(), *InVal)) {
190 void DivergenceAnalysis::taintLoopLiveOuts(
const BasicBlock &LoopHeader) {
192 assert(DivLoop &&
"loopHeader is not actually part of a loop");
195 DivLoop->getExitBlocks(TaintStack);
200 for (
auto *Block : TaintStack) {
203 Visited.
insert(&LoopHeader);
205 while (!TaintStack.empty()) {
206 auto *UserBlock = TaintStack.back();
207 TaintStack.pop_back();
213 assert(!DivLoop->contains(UserBlock) &&
214 "irreducible control flow detected");
217 if (!DT.
dominates(&LoopHeader, UserBlock)) {
219 for (
auto &Phi : UserBlock->phis()) {
220 Worklist.push_back(&Phi);
226 for (
auto &I : *UserBlock) {
236 if (DivLoop->contains(OpInst->getParent())) {
245 for (
auto *SuccBlock :
successors(UserBlock)) {
246 if (!Visited.
insert(SuccBlock).second) {
249 TaintStack.push_back(SuccBlock);
254 void DivergenceAnalysis::pushPHINodes(
const BasicBlock &Block) {
255 for (
const auto &Phi : Block.
phis()) {
258 Worklist.push_back(&Phi);
262 void DivergenceAnalysis::pushUsers(
const Value &V) {
274 Worklist.push_back(UserInst);
278 bool DivergenceAnalysis::propagateJoinDivergence(
const BasicBlock &JoinBlock,
279 const Loop *BranchLoop) {
288 pushPHINodes(JoinBlock);
291 if (BranchLoop && !BranchLoop->
contains(&JoinBlock)) {
296 markBlockJoinDivergent(JoinBlock);
300 void DivergenceAnalysis::propagateBranchDivergence(
const Instruction &Term) {
308 bool IsBranchLoopDivergent =
false;
312 for (
const auto *JoinBlock : SDA.
join_blocks(Term)) {
313 IsBranchLoopDivergent |= propagateJoinDivergence(*JoinBlock, BranchLoop);
317 if (IsBranchLoopDivergent) {
319 if (!DivergentLoops.insert(BranchLoop).second) {
322 propagateLoopDivergence(*BranchLoop);
326 void DivergenceAnalysis::propagateLoopDivergence(
const Loop &ExitingLoop) {
342 taintLoopLiveOuts(*ExitingLoop.
getHeader());
345 bool IsBranchLoopDivergent =
false;
350 for (
const auto *JoinBlock : SDA.
join_blocks(ExitingLoop)) {
351 IsBranchLoopDivergent |= propagateJoinDivergence(*JoinBlock, BranchLoop);
355 if (IsBranchLoopDivergent) {
357 if (!DivergentLoops.insert(BranchLoop).second) {
360 propagateLoopDivergence(*BranchLoop);
365 for (
auto *DivVal : DivergentValues) {
370 while (!Worklist.empty()) {
384 if (updateTerminator(I)) {
386 propagateBranchDivergence(I);
392 bool DivergentUpd =
false;
395 DivergentUpd = updatePHINode(*Phi);
397 DivergentUpd = updateNormalInstruction(I);
409 return UniformOverrides.
find(&V) != UniformOverrides.
end();
413 return DivergentValues.
find(&V) != DivergentValues.
end();
417 if (DivergentValues.
empty())
422 OS <<
"DIVERGENT:" << I <<
'\n';
432 : SDA(DT, PDT, LI), DA(F, nullptr, DT, LI, SDA,
false) {
This class represents lattice values for constants.
A Module instance is used to store all the information related to an LLVM module. ...
void addUniformOverride(const Value &UniVal)
Mark UniVal as a value that is always uniform.
Implements a dense probed hash-table based set.
bool isTerminator() const
const ConstBlockSet & join_blocks(const Instruction &Term)
Computes divergent join points and loop exits caused by branch divergence in Term.
DivergenceAnalysis(const Function &F, const Loop *RegionLoop, const DominatorTree &DT, const LoopInfo &LI, SyncDependenceAnalysis &SDA, bool IsLCSSAForm)
This instance will analyze the whole function F or the loop RegionLoop.
void compute()
Propagate divergence to all instructions in the region.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
bool isDivergent(const Value &V) const
Whether V is divergent.
const Function & getFunction() const
BlockT * getHeader() const
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
LLVM Basic Block Representation.
std::pair< iterator, bool > insert(const ValueT &V)
Relates points of divergent control to join points in reducible CFGs.
GPUDivergenceAnalysis(Function &F, const DominatorTree &DT, const PostDominatorTree &PDT, const LoopInfo &LI, const TargetTransformInfo &TTI)
Runs the divergence analysis on , a GPU kernel.
bool hasConstantOrUndefValue() const
Whether the specified PHI node always merges together the same value, assuming undefs are equal to a ...
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
iterator_range< user_iterator > users()
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
amdgpu Simplify well known AMD library false Value Value * Arg
LoopT * getParentLoop() const
StringRef getName() const
Represents a single loop in the control flow graph.
StringRef getName() const
Return a constant reference to the value's name.
const Function * getParent() const
Return the enclosing method, or null if none.
bool inRegion(const BasicBlock &BB) const
Whether BB is part of the region.
bool isDivergent(const Value &Val) const
Whether Val is a divergent value.
iterator find(const_arg_type_t< ValueT > V)
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
bool isAlwaysUniform(const Value &Val) const
Whether Val will always return a uniform value regardless of its operands.
void markDivergent(const Value &DivVal)
Mark DivVal as a value that is always divergent.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
succ_range successors(Instruction *I)
This class implements an extremely fast bulk output stream that can only output to a stream...
inst_range instructions(Function *F)
void print(raw_ostream &OS, const Module *) const
void print(raw_ostream &OS, const Module *) const
Print all divergent values in the kernel.
iterator_range< arg_iterator > args()
const BasicBlock * getParent() const