LLVM  8.0.1
VPlanSLP.cpp
Go to the documentation of this file.
1 //===- VPlanSLP.cpp - SLP Analysis based on VPlan -------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 /// This file implements SLP analysis based on VPlan. The analysis is based on
10 /// the ideas described in
11 ///
12 /// Look-ahead SLP: auto-vectorization in the presence of commutative
13 /// operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O. Rocha,
14 /// Luís F. W. Góes
15 ///
16 //===----------------------------------------------------------------------===//
17 
18 #include "VPlan.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/Analysis/LoopInfo.h"
25 #include "llvm/IR/BasicBlock.h"
26 #include "llvm/IR/CFG.h"
27 #include "llvm/IR/Dominators.h"
28 #include "llvm/IR/InstrTypes.h"
29 #include "llvm/IR/Instruction.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/Type.h"
32 #include "llvm/IR/Value.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/Debug.h"
39 #include <cassert>
40 #include <iterator>
41 #include <string>
42 #include <vector>
43 
44 using namespace llvm;
45 
46 #define DEBUG_TYPE "vplan-slp"
47 
48 // Number of levels to look ahead when re-ordering multi node operands.
49 static unsigned LookaheadMaxDepth = 5;
50 
51 VPInstruction *VPlanSlp::markFailed() {
52  // FIXME: Currently this is used to signal we hit instructions we cannot
53  // trivially SLP'ize.
54  CompletelySLP = false;
55  return nullptr;
56 }
57 
58 void VPlanSlp::addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New) {
59  if (all_of(Operands, [](VPValue *V) {
60  return cast<VPInstruction>(V)->getUnderlyingInstr();
61  })) {
62  unsigned BundleSize = 0;
63  for (VPValue *V : Operands) {
64  Type *T = cast<VPInstruction>(V)->getUnderlyingInstr()->getType();
65  assert(!T->isVectorTy() && "Only scalar types supported for now");
66  BundleSize += T->getScalarSizeInBits();
67  }
68  WidestBundleBits = std::max(WidestBundleBits, BundleSize);
69  }
70 
71  auto Res = BundleToCombined.try_emplace(to_vector<4>(Operands), New);
72  assert(Res.second &&
73  "Already created a combined instruction for the operand bundle");
74  (void)Res;
75 }
76 
77 bool VPlanSlp::areVectorizable(ArrayRef<VPValue *> Operands) const {
78  // Currently we only support VPInstructions.
79  if (!all_of(Operands, [](VPValue *Op) {
80  return Op && isa<VPInstruction>(Op) &&
81  cast<VPInstruction>(Op)->getUnderlyingInstr();
82  })) {
83  LLVM_DEBUG(dbgs() << "VPSLP: not all operands are VPInstructions\n");
84  return false;
85  }
86 
87  // Check if opcodes and type width agree for all instructions in the bundle.
88  // FIXME: Differing widths/opcodes can be handled by inserting additional
89  // instructions.
90  // FIXME: Deal with non-primitive types.
91  const Instruction *OriginalInstr =
92  cast<VPInstruction>(Operands[0])->getUnderlyingInstr();
93  unsigned Opcode = OriginalInstr->getOpcode();
94  unsigned Width = OriginalInstr->getType()->getPrimitiveSizeInBits();
95  if (!all_of(Operands, [Opcode, Width](VPValue *Op) {
96  const Instruction *I = cast<VPInstruction>(Op)->getUnderlyingInstr();
97  return I->getOpcode() == Opcode &&
98  I->getType()->getPrimitiveSizeInBits() == Width;
99  })) {
100  LLVM_DEBUG(dbgs() << "VPSLP: Opcodes do not agree \n");
101  return false;
102  }
103 
104  // For now, all operands must be defined in the same BB.
105  if (any_of(Operands, [this](VPValue *Op) {
106  return cast<VPInstruction>(Op)->getParent() != &this->BB;
107  })) {
108  LLVM_DEBUG(dbgs() << "VPSLP: operands in different BBs\n");
109  return false;
110  }
111 
112  if (any_of(Operands,
113  [](VPValue *Op) { return Op->hasMoreThanOneUniqueUser(); })) {
114  LLVM_DEBUG(dbgs() << "VPSLP: Some operands have multiple users.\n");
115  return false;
116  }
117 
118  // For loads, check that there are no instructions writing to memory in
119  // between them.
120  // TODO: we only have to forbid instructions writing to memory that could
121  // interfere with any of the loads in the bundle
122  if (Opcode == Instruction::Load) {
123  unsigned LoadsSeen = 0;
124  VPBasicBlock *Parent = cast<VPInstruction>(Operands[0])->getParent();
125  for (auto &I : *Parent) {
126  auto *VPI = cast<VPInstruction>(&I);
127  if (VPI->getOpcode() == Instruction::Load &&
128  std::find(Operands.begin(), Operands.end(), VPI) != Operands.end())
129  LoadsSeen++;
130 
131  if (LoadsSeen == Operands.size())
132  break;
133  if (LoadsSeen > 0 && VPI->mayWriteToMemory()) {
134  LLVM_DEBUG(
135  dbgs() << "VPSLP: instruction modifying memory between loads\n");
136  return false;
137  }
138  }
139 
140  if (!all_of(Operands, [](VPValue *Op) {
141  return cast<LoadInst>(cast<VPInstruction>(Op)->getUnderlyingInstr())
142  ->isSimple();
143  })) {
144  LLVM_DEBUG(dbgs() << "VPSLP: only simple loads are supported.\n");
145  return false;
146  }
147  }
148 
149  if (Opcode == Instruction::Store)
150  if (!all_of(Operands, [](VPValue *Op) {
151  return cast<StoreInst>(cast<VPInstruction>(Op)->getUnderlyingInstr())
152  ->isSimple();
153  })) {
154  LLVM_DEBUG(dbgs() << "VPSLP: only simple stores are supported.\n");
155  return false;
156  }
157 
158  return true;
159 }
160 
162  unsigned OperandIndex) {
163  SmallVector<VPValue *, 4> Operands;
164  for (VPValue *V : Values) {
165  auto *U = cast<VPUser>(V);
166  Operands.push_back(U->getOperand(OperandIndex));
167  }
168  return Operands;
169 }
170 
171 static bool areCommutative(ArrayRef<VPValue *> Values) {
173  cast<VPInstruction>(Values[0])->getOpcode());
174 }
175 
179  auto *VPI = cast<VPInstruction>(Values[0]);
180 
181  switch (VPI->getOpcode()) {
182  case Instruction::Load:
183  llvm_unreachable("Loads terminate a tree, no need to get operands");
184  case Instruction::Store:
185  Result.push_back(getOperands(Values, 0));
186  break;
187  default:
188  for (unsigned I = 0, NumOps = VPI->getNumOperands(); I < NumOps; ++I)
189  Result.push_back(getOperands(Values, I));
190  break;
191  }
192 
193  return Result;
194 }
195 
196 /// Returns the opcode of Values or ~0 if they do not all agree.
198  unsigned Opcode = cast<VPInstruction>(Values[0])->getOpcode();
199  if (any_of(Values, [Opcode](VPValue *V) {
200  return cast<VPInstruction>(V)->getOpcode() != Opcode;
201  }))
202  return None;
203  return {Opcode};
204 }
205 
206 /// Returns true if A and B access sequential memory if they are loads or
207 /// stores or if they have identical opcodes otherwise.
210  if (A->getOpcode() != B->getOpcode())
211  return false;
212 
213  if (A->getOpcode() != Instruction::Load &&
215  return true;
216  auto *GA = IAI.getInterleaveGroup(A);
217  auto *GB = IAI.getInterleaveGroup(B);
218 
219  return GA && GB && GA == GB && GA->getIndex(A) + 1 == GB->getIndex(B);
220 }
221 
222 /// Implements getLAScore from Listing 7 in the paper.
223 /// Traverses and compares operands of V1 and V2 to MaxLevel.
224 static unsigned getLAScore(VPValue *V1, VPValue *V2, unsigned MaxLevel,
226  if (!isa<VPInstruction>(V1) || !isa<VPInstruction>(V2))
227  return 0;
228 
229  if (MaxLevel == 0)
230  return (unsigned)areConsecutiveOrMatch(cast<VPInstruction>(V1),
231  cast<VPInstruction>(V2), IAI);
232 
233  unsigned Score = 0;
234  for (unsigned I = 0, EV1 = cast<VPUser>(V1)->getNumOperands(); I < EV1; ++I)
235  for (unsigned J = 0, EV2 = cast<VPUser>(V2)->getNumOperands(); J < EV2; ++J)
236  Score += getLAScore(cast<VPUser>(V1)->getOperand(I),
237  cast<VPUser>(V2)->getOperand(J), MaxLevel - 1, IAI);
238  return Score;
239 }
240 
241 std::pair<VPlanSlp::OpMode, VPValue *>
242 VPlanSlp::getBest(OpMode Mode, VPValue *Last,
243  SmallPtrSetImpl<VPValue *> &Candidates,
245  assert((Mode == OpMode::Load || Mode == OpMode::Opcode) &&
246  "Currently we only handle load and commutative opcodes");
247  LLVM_DEBUG(dbgs() << " getBest\n");
248 
249  SmallVector<VPValue *, 4> BestCandidates;
250  LLVM_DEBUG(dbgs() << " Candidates for "
251  << *cast<VPInstruction>(Last)->getUnderlyingInstr() << " ");
252  for (auto *Candidate : Candidates) {
253  auto *LastI = cast<VPInstruction>(Last);
254  auto *CandidateI = cast<VPInstruction>(Candidate);
255  if (areConsecutiveOrMatch(LastI, CandidateI, IAI)) {
256  LLVM_DEBUG(dbgs() << *cast<VPInstruction>(Candidate)->getUnderlyingInstr()
257  << " ");
258  BestCandidates.push_back(Candidate);
259  }
260  }
261  LLVM_DEBUG(dbgs() << "\n");
262 
263  if (BestCandidates.empty())
264  return {OpMode::Failed, nullptr};
265 
266  if (BestCandidates.size() == 1)
267  return {Mode, BestCandidates[0]};
268 
269  VPValue *Best = nullptr;
270  unsigned BestScore = 0;
271  for (unsigned Depth = 1; Depth < LookaheadMaxDepth; Depth++) {
272  unsigned PrevScore = ~0u;
273  bool AllSame = true;
274 
275  // FIXME: Avoid visiting the same operands multiple times.
276  for (auto *Candidate : BestCandidates) {
277  unsigned Score = getLAScore(Last, Candidate, Depth, IAI);
278  if (PrevScore == ~0u)
279  PrevScore = Score;
280  if (PrevScore != Score)
281  AllSame = false;
282  PrevScore = Score;
283 
284  if (Score > BestScore) {
285  BestScore = Score;
286  Best = Candidate;
287  }
288  }
289  if (!AllSame)
290  break;
291  }
292  LLVM_DEBUG(dbgs() << "Found best "
293  << *cast<VPInstruction>(Best)->getUnderlyingInstr()
294  << "\n");
295  Candidates.erase(Best);
296 
297  return {Mode, Best};
298 }
299 
300 SmallVector<VPlanSlp::MultiNodeOpTy, 4> VPlanSlp::reorderMultiNodeOps() {
303  FinalOrder.reserve(MultiNodeOps.size());
304  Mode.reserve(MultiNodeOps.size());
305 
306  LLVM_DEBUG(dbgs() << "Reordering multinode\n");
307 
308  for (auto &Operands : MultiNodeOps) {
309  FinalOrder.push_back({Operands.first, {Operands.second[0]}});
310  if (cast<VPInstruction>(Operands.second[0])->getOpcode() ==
312  Mode.push_back(OpMode::Load);
313  else
314  Mode.push_back(OpMode::Opcode);
315  }
316 
317  for (unsigned Lane = 1, E = MultiNodeOps[0].second.size(); Lane < E; ++Lane) {
318  LLVM_DEBUG(dbgs() << " Finding best value for lane " << Lane << "\n");
319  SmallPtrSet<VPValue *, 4> Candidates;
320  LLVM_DEBUG(dbgs() << " Candidates ");
321  for (auto Ops : MultiNodeOps) {
322  LLVM_DEBUG(
323  dbgs() << *cast<VPInstruction>(Ops.second[Lane])->getUnderlyingInstr()
324  << " ");
325  Candidates.insert(Ops.second[Lane]);
326  }
327  LLVM_DEBUG(dbgs() << "\n");
328 
329  for (unsigned Op = 0, E = MultiNodeOps.size(); Op < E; ++Op) {
330  LLVM_DEBUG(dbgs() << " Checking " << Op << "\n");
331  if (Mode[Op] == OpMode::Failed)
332  continue;
333 
334  VPValue *Last = FinalOrder[Op].second[Lane - 1];
335  std::pair<OpMode, VPValue *> Res =
336  getBest(Mode[Op], Last, Candidates, IAI);
337  if (Res.second)
338  FinalOrder[Op].second.push_back(Res.second);
339  else
340  // TODO: handle this case
341  FinalOrder[Op].second.push_back(markFailed());
342  }
343  }
344 
345  return FinalOrder;
346 }
347 
348 void VPlanSlp::dumpBundle(ArrayRef<VPValue *> Values) {
349  dbgs() << " Ops: ";
350  for (auto Op : Values)
351  if (auto *Instr = cast_or_null<VPInstruction>(Op)->getUnderlyingInstr())
352  dbgs() << *Instr << " | ";
353  else
354  dbgs() << " nullptr | ";
355  dbgs() << "\n";
356 }
357 
359  assert(!Values.empty() && "Need some operands!");
360 
361  // If we already visited this instruction bundle, re-use the existing node
362  auto I = BundleToCombined.find(to_vector<4>(Values));
363  if (I != BundleToCombined.end()) {
364 #ifndef NDEBUG
365  // Check that the resulting graph is a tree. If we re-use a node, this means
366  // its values have multiple users. We only allow this, if all users of each
367  // value are the same instruction.
368  for (auto *V : Values) {
369  auto UI = V->user_begin();
370  auto *FirstUser = *UI++;
371  while (UI != V->user_end()) {
372  assert(*UI == FirstUser && "Currently we only support SLP trees.");
373  UI++;
374  }
375  }
376 #endif
377  return I->second;
378  }
379 
380  // Dump inputs
381  LLVM_DEBUG({
382  dbgs() << "buildGraph: ";
383  dumpBundle(Values);
384  });
385 
386  if (!areVectorizable(Values))
387  return markFailed();
388 
389  assert(getOpcode(Values) && "Opcodes for all values must match");
390  unsigned ValuesOpcode = getOpcode(Values).getValue();
391 
392  SmallVector<VPValue *, 4> CombinedOperands;
393  if (areCommutative(Values)) {
394  bool MultiNodeRoot = !MultiNodeActive;
395  MultiNodeActive = true;
396  for (auto &Operands : getOperands(Values)) {
397  LLVM_DEBUG({
398  dbgs() << " Visiting Commutative";
399  dumpBundle(Operands);
400  });
401 
402  auto OperandsOpcode = getOpcode(Operands);
403  if (OperandsOpcode && OperandsOpcode == getOpcode(Values)) {
404  LLVM_DEBUG(dbgs() << " Same opcode, continue building\n");
405  CombinedOperands.push_back(buildGraph(Operands));
406  } else {
407  LLVM_DEBUG(dbgs() << " Adding multinode Ops\n");
408  // Create dummy VPInstruction, which will we replace later by the
409  // re-ordered operand.
410  VPInstruction *Op = new VPInstruction(0, {});
411  CombinedOperands.push_back(Op);
412  MultiNodeOps.emplace_back(Op, Operands);
413  }
414  }
415 
416  if (MultiNodeRoot) {
417  LLVM_DEBUG(dbgs() << "Reorder \n");
418  MultiNodeActive = false;
419 
420  auto FinalOrder = reorderMultiNodeOps();
421 
422  MultiNodeOps.clear();
423  for (auto &Ops : FinalOrder) {
424  VPInstruction *NewOp = buildGraph(Ops.second);
425  Ops.first->replaceAllUsesWith(NewOp);
426  for (unsigned i = 0; i < CombinedOperands.size(); i++)
427  if (CombinedOperands[i] == Ops.first)
428  CombinedOperands[i] = NewOp;
429  delete Ops.first;
430  Ops.first = NewOp;
431  }
432  LLVM_DEBUG(dbgs() << "Found final order\n");
433  }
434  } else {
435  LLVM_DEBUG(dbgs() << " NonCommuntative\n");
436  if (ValuesOpcode == Instruction::Load)
437  for (VPValue *V : Values)
438  CombinedOperands.push_back(cast<VPInstruction>(V)->getOperand(0));
439  else
440  for (auto &Operands : getOperands(Values))
441  CombinedOperands.push_back(buildGraph(Operands));
442  }
443 
444  unsigned Opcode;
445  switch (ValuesOpcode) {
446  case Instruction::Load:
447  Opcode = VPInstruction::SLPLoad;
448  break;
449  case Instruction::Store:
450  Opcode = VPInstruction::SLPStore;
451  break;
452  default:
453  Opcode = ValuesOpcode;
454  break;
455  }
456 
457  if (!CompletelySLP)
458  return markFailed();
459 
460  assert(CombinedOperands.size() > 0 && "Need more some operands");
461  auto *VPI = new VPInstruction(Opcode, CombinedOperands);
462  VPI->setUnderlyingInstr(cast<VPInstruction>(Values[0])->getUnderlyingInstr());
463 
464  LLVM_DEBUG(dbgs() << "Create VPInstruction "; VPI->print(dbgs());
465  cast<VPInstruction>(Values[0])->print(dbgs()); dbgs() << "\n");
466  addCombined(Values, VPI);
467  return VPI;
468 }
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
SI Whole Quad Mode
This class represents lattice values for constants.
Definition: AllocatorList.h:24
iterator begin() const
Definition: ArrayRef.h:137
bool hasMoreThanOneUniqueUser()
Returns true if the value has more than one unique user.
Definition: VPlanValue.h:111
unsigned second
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1186
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
void reserve(size_type N)
Definition: SmallVector.h:376
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
static bool areCommutative(ArrayRef< VPValue *> Values)
Definition: VPlanSLP.cpp:171
static Optional< unsigned > getOpcode(ArrayRef< VPValue *> Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:197
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:161
InterleaveGroup< VPInstruction > * getInterleaveGroup(VPInstruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VPlan.h:1531
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations of the Vectorization Plan base classes:
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1193
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
size_t size() const
Definition: SmallVector.h:53
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1207
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:965
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
VPInstruction * buildGraph(ArrayRef< VPValue *> Operands)
Tries to build an SLP tree rooted at Operands and returns a VPInstruction combining Operands...
Definition: VPlanSLP.cpp:358
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:148
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
iterator end() const
Definition: ArrayRef.h:138
static unsigned getLAScore(VPValue *V1, VPValue *V2, unsigned MaxLevel, VPInterleavedAccessInfo &IAI)
Implements getLAScore from Listing 7 in the paper.
Definition: VPlanSLP.cpp:224
bool isCommutative() const
Return true if the instruction is commutative:
Definition: Instruction.h:478
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
static unsigned LookaheadMaxDepth
Definition: VPlanSLP.cpp:49
static bool areConsecutiveOrMatch(VPInstruction *A, VPInstruction *B, VPInterleavedAccessInfo &IAI)
Returns true if A and B access sequential memory if they are loads or stores or if they have identica...
Definition: VPlanSLP.cpp:208
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
void emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:652
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
#define I(x, y, z)
Definition: MD5.cpp:58
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
unsigned getOpcode() const
Definition: VPlan.h:662
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
static const Function * getParent(const Value *V)
static SmallVector< VPValue *, 4 > getOperands(ArrayRef< VPValue *> Values, unsigned OperandIndex)
Definition: VPlanSLP.cpp:161
#define LLVM_DEBUG(X)
Definition: Debug.h:123
This is a concrete Recipe that models a single VPlan-level instruction.
Definition: VPlan.h:611
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144