46 #define DEBUG_TYPE "vplan-slp" 54 CompletelySLP =
false;
60 return cast<VPInstruction>(V)->getUnderlyingInstr();
62 unsigned BundleSize = 0;
64 Type *
T = cast<VPInstruction>(V)->getUnderlyingInstr()->getType();
68 WidestBundleBits =
std::max(WidestBundleBits, BundleSize);
71 auto Res = BundleToCombined.try_emplace(to_vector<4>(Operands), New);
73 "Already created a combined instruction for the operand bundle");
80 return Op && isa<VPInstruction>(
Op) &&
81 cast<VPInstruction>(Op)->getUnderlyingInstr();
83 LLVM_DEBUG(
dbgs() <<
"VPSLP: not all operands are VPInstructions\n");
92 cast<VPInstruction>(Operands[0])->getUnderlyingInstr();
93 unsigned Opcode = OriginalInstr->
getOpcode();
96 const Instruction *
I = cast<VPInstruction>(
Op)->getUnderlyingInstr();
106 return cast<VPInstruction>(
Op)->
getParent() != &this->BB;
114 LLVM_DEBUG(
dbgs() <<
"VPSLP: Some operands have multiple users.\n");
123 unsigned LoadsSeen = 0;
125 for (
auto &
I : *Parent) {
126 auto *VPI = cast<VPInstruction>(&
I);
131 if (LoadsSeen == Operands.
size())
133 if (LoadsSeen > 0 && VPI->mayWriteToMemory()) {
135 dbgs() <<
"VPSLP: instruction modifying memory between loads\n");
141 return cast<LoadInst>(cast<VPInstruction>(
Op)->getUnderlyingInstr())
144 LLVM_DEBUG(
dbgs() <<
"VPSLP: only simple loads are supported.\n");
151 return cast<StoreInst>(cast<VPInstruction>(
Op)->getUnderlyingInstr())
154 LLVM_DEBUG(
dbgs() <<
"VPSLP: only simple stores are supported.\n");
162 unsigned OperandIndex) {
165 auto *U = cast<VPUser>(V);
166 Operands.
push_back(U->getOperand(OperandIndex));
173 cast<VPInstruction>(Values[0])->
getOpcode());
179 auto *VPI = cast<VPInstruction>(Values[0]);
181 switch (VPI->getOpcode()) {
188 for (
unsigned I = 0, NumOps = VPI->getNumOperands();
I < NumOps; ++
I)
198 unsigned Opcode = cast<VPInstruction>(Values[0])->
getOpcode();
200 return cast<VPInstruction>(V)->
getOpcode() != Opcode;
219 return GA && GB && GA == GB && GA->getIndex(A) + 1 == GB->getIndex(B);
226 if (!isa<VPInstruction>(V1) || !isa<VPInstruction>(V2))
231 cast<VPInstruction>(V2), IAI);
234 for (
unsigned I = 0, EV1 = cast<VPUser>(V1)->getNumOperands();
I < EV1; ++
I)
235 for (
unsigned J = 0, EV2 = cast<VPUser>(V2)->getNumOperands(); J < EV2; ++J)
236 Score +=
getLAScore(cast<VPUser>(V1)->getOperand(
I),
237 cast<VPUser>(V2)->getOperand(J), MaxLevel - 1, IAI);
241 std::pair<VPlanSlp::OpMode, VPValue *>
246 "Currently we only handle load and commutative opcodes");
251 << *cast<VPInstruction>(Last)->getUnderlyingInstr() <<
" ");
252 for (
auto *Candidate : Candidates) {
253 auto *LastI = cast<VPInstruction>(Last);
254 auto *CandidateI = cast<VPInstruction>(Candidate);
256 LLVM_DEBUG(
dbgs() << *cast<VPInstruction>(Candidate)->getUnderlyingInstr()
263 if (BestCandidates.
empty())
266 if (BestCandidates.
size() == 1)
267 return {
Mode, BestCandidates[0]};
270 unsigned BestScore = 0;
272 unsigned PrevScore = ~0u;
276 for (
auto *Candidate : BestCandidates) {
278 if (PrevScore == ~0u)
280 if (PrevScore != Score)
284 if (Score > BestScore) {
293 << *cast<VPInstruction>(Best)->getUnderlyingInstr()
295 Candidates.erase(Best);
308 for (
auto &Operands : MultiNodeOps) {
309 FinalOrder.
push_back({Operands.first, {Operands.second[0]}});
310 if (cast<VPInstruction>(Operands.second[0])->getOpcode() ==
317 for (
unsigned Lane = 1,
E = MultiNodeOps[0].
second.size(); Lane <
E; ++Lane) {
318 LLVM_DEBUG(
dbgs() <<
" Finding best value for lane " << Lane <<
"\n");
321 for (
auto Ops : MultiNodeOps) {
323 dbgs() << *cast<VPInstruction>(Ops.second[Lane])->getUnderlyingInstr()
325 Candidates.
insert(Ops.second[Lane]);
329 for (
unsigned Op = 0, E = MultiNodeOps.size();
Op <
E; ++
Op) {
334 VPValue *Last = FinalOrder[
Op].second[Lane - 1];
335 std::pair<OpMode, VPValue *> Res =
336 getBest(Mode[
Op], Last, Candidates, IAI);
350 for (
auto Op : Values)
351 if (
auto *Instr = cast_or_null<VPInstruction>(
Op)->getUnderlyingInstr())
352 dbgs() << *Instr <<
" | ";
354 dbgs() <<
" nullptr | ";
362 auto I = BundleToCombined.find(to_vector<4>(Values));
363 if (I != BundleToCombined.end()) {
368 for (
auto *V : Values) {
369 auto UI = V->user_begin();
370 auto *FirstUser = *UI++;
371 while (UI != V->user_end()) {
372 assert(*UI == FirstUser &&
"Currently we only support SLP trees.");
382 dbgs() <<
"buildGraph: ";
386 if (!areVectorizable(Values))
394 bool MultiNodeRoot = !MultiNodeActive;
395 MultiNodeActive =
true;
398 dbgs() <<
" Visiting Commutative";
399 dumpBundle(Operands);
402 auto OperandsOpcode =
getOpcode(Operands);
403 if (OperandsOpcode && OperandsOpcode ==
getOpcode(Values)) {
418 MultiNodeActive =
false;
420 auto FinalOrder = reorderMultiNodeOps();
422 MultiNodeOps.
clear();
423 for (
auto &Ops : FinalOrder) {
425 Ops.first->replaceAllUsesWith(NewOp);
426 for (
unsigned i = 0; i < CombinedOperands.
size(); i++)
427 if (CombinedOperands[i] == Ops.first)
428 CombinedOperands[i] = NewOp;
438 CombinedOperands.
push_back(cast<VPInstruction>(V)->getOperand(0));
445 switch (ValuesOpcode) {
453 Opcode = ValuesOpcode;
460 assert(CombinedOperands.
size() > 0 &&
"Need more some operands");
462 VPI->setUnderlyingInstr(cast<VPInstruction>(Values[0])->getUnderlyingInstr());
465 cast<VPInstruction>(Values[0])->
print(
dbgs());
dbgs() <<
"\n");
466 addCombined(Values, VPI);
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents lattice values for constants.
void push_back(const T &Elt)
bool hasMoreThanOneUniqueUser()
Returns true if the value has more than one unique user.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
bool isVectorTy() const
True if this is an instance of VectorType.
void reserve(size_type N)
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
static bool areCommutative(ArrayRef< VPValue *> Values)
static Optional< unsigned > getOpcode(ArrayRef< VPValue *> Values)
Returns the opcode of Values or ~0 if they do not all agree.
Type * getType() const
All values are typed, get the type of this value.
const T & getValue() const LLVM_LVALUE_FUNCTION
InterleaveGroup< VPInstruction > * getInterleaveGroup(VPInstruction *Instr) const
Get the interleave group that Instr belongs to.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations of the Vectorization Plan base classes:
The instances of the Type class are immutable: once they are created, they are never changed...
size_t size() const
size - Get the array size.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
VPInstruction * buildGraph(ArrayRef< VPValue *> Operands)
Tries to build an SLP tree rooted at Operands and returns a VPInstruction combining Operands...
testing::Matcher< const detail::ErrorHolder & > Failed()
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
static unsigned getLAScore(VPValue *V1, VPValue *V2, unsigned MaxLevel, VPInterleavedAccessInfo &IAI)
Implements getLAScore from Listing 7 in the paper.
bool isCommutative() const
Return true if the instruction is commutative:
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static unsigned LookaheadMaxDepth
static bool areConsecutiveOrMatch(VPInstruction *A, VPInstruction *B, VPInterleavedAccessInfo &IAI)
Returns true if A and B access sequential memory if they are loads or stores or if they have identica...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
void emplace_back(ArgTypes &&... Args)
LLVM_NODISCARD bool empty() const
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
unsigned getOpcode() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static const Function * getParent(const Value *V)
static SmallVector< VPValue *, 4 > getOperands(ArrayRef< VPValue *> Values, unsigned OperandIndex)
This is a concrete Recipe that models a single VPlan-level instruction.
bool empty() const
empty - Check if the array is empty.