27 #define DEBUG_TYPE "hexagon-isel" 104 using MapType = std::map<Node, ColorKind>;
105 static constexpr Node Ignore = Node(-1);
113 const MapType &colors()
const {
119 return ColorKind::Red;
120 return Color == ColorKind::Red ? ColorKind::Black : ColorKind::Red;
128 std::set<Node> Needed;
130 using NodeSet = std::set<Node>;
131 std::map<Node,NodeSet> Edges;
133 Node conj(Node Pos) {
134 Node Num = Order.
size();
135 return (Pos < Num/2) ? Pos + Num/2 : Pos - Num/2;
139 auto F = Colors.find(N);
143 std::pair<bool, ColorKind> getUniqueColor(
const NodeSet &Nodes);
150 std::pair<bool, ColorKind> Coloring::getUniqueColor(
const NodeSet &Nodes) {
152 for (Node
N : Nodes) {
161 return {
true,
Color };
166 for (
unsigned P = 0;
P != Order.size(); ++
P) {
170 Node PC = Order[conj(
P)];
171 if (PC != Ignore && PC != I)
176 for (
unsigned I = 0;
I != Order.size(); ++
I) {
177 if (!Needed.count(
I))
189 bool Coloring::color() {
191 auto Enqueue = [
this,&FirstQ] (Node
N) {
194 for (
unsigned I = 0;
I != Q.
size(); ++
I) {
200 for (Node
N : Needed)
203 for (Node
N : FirstQ) {
207 auto P = getUniqueColor(Ns);
210 Colors[
N] = other(
P.second);
214 for (
auto E : Edges) {
216 if (!Needed.count(conj(N)) || Colors.count(N))
218 auto P = getUniqueColor(
E.second);
221 Colors[
N] = other(
P.second);
226 std::vector<Node> WorkQ;
227 for (
auto E : Edges) {
229 if (!Colors.count(N))
233 for (
unsigned I = 0;
I < WorkQ.size(); ++
I) {
236 auto P = getUniqueColor(Ns);
238 Colors[
N] = other(
P.second);
248 for (Node M : CopyNs) {
250 if (ColorM == ColorC) {
263 for (
unsigned I = 0;
I != Order.size(); ++
I)
264 if (Colors.count(
I) == 0)
270 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 272 dbgs() <<
"{ Order: {";
273 for (
unsigned I = 0;
I != Order.size(); ++
I) {
281 dbgs() <<
" Needed: {";
282 for (Node
N : Needed)
286 dbgs() <<
" Edges: {\n";
287 for (
auto E : Edges) {
288 dbgs() <<
" " <<
E.first <<
" -> {";
289 for (
auto N :
E.second)
301 case ColorKind::Black:
307 dbgs() <<
" Colors: {\n";
308 for (
auto C : Colors)
309 dbgs() <<
" " <<
C.first <<
" -> " << ColorKindToName(
C.second) <<
"\n";
319 using Controls = std::vector<uint8_t>;
320 using ElemType = int;
321 static constexpr ElemType Ignore = ElemType(-1);
337 unsigned S = Order.size();
341 Table.resize(Order.size());
342 for (RowType &Row : Table)
346 void getControls(Controls &V,
unsigned StartAt, uint8_t Dir)
const {
347 unsigned Size = Order.size();
349 for (
unsigned I = 0;
I !=
Size; ++
I) {
351 for (
unsigned L = 0; L != Log; ++L) {
352 unsigned C = ctl(
I, StartAt+L) == Switch;
363 uint8_t ctl(ElemType Pos,
unsigned Step)
const {
364 return Table[Pos][Step];
366 unsigned size()
const {
369 unsigned steps()
const {
375 std::vector<ElemType> Order;
376 using RowType = std::vector<uint8_t>;
377 std::vector<RowType> Table;
380 struct ForwardDeltaNetwork :
public PermNetwork {
383 bool run(Controls &V) {
384 if (!route(Order.data(), Table.data(),
size(), 0))
386 getControls(V, 0, Forward);
391 bool route(ElemType *
P, RowType *
T,
unsigned Size,
unsigned Step);
394 struct ReverseDeltaNetwork :
public PermNetwork {
397 bool run(Controls &V) {
398 if (!route(Order.data(), Table.data(),
size(), 0))
400 getControls(V, 0, Reverse);
405 bool route(ElemType *
P, RowType *
T,
unsigned Size,
unsigned Step);
408 struct BenesNetwork :
public PermNetwork {
411 bool run(Controls &
F, Controls &R) {
412 if (!route(Order.data(), Table.data(),
size(), 0))
415 getControls(F, 0, Forward);
416 getControls(R, Log, Reverse);
421 bool route(ElemType *
P, RowType *
T,
unsigned Size,
unsigned Step);
425 bool ForwardDeltaNetwork::route(ElemType *
P, RowType *
T,
unsigned Size,
427 bool UseUp =
false, UseDown =
false;
434 for (ElemType J = 0; J != Num; ++J) {
442 S = (J < Num/2) ?
Pass : Switch;
444 S = (J < Num/2) ? Switch :
Pass;
447 ElemType U = (S ==
Pass) ? I : (I < Num/2 ? I+Num/2 : I-Num/2);
452 if (
T[U][Step] != S &&
T[U][Step] !=
None)
457 for (ElemType J = 0; J != Num; ++J)
458 if (
P[J] != Ignore &&
P[J] >= Num/2)
462 if (UseUp && !route(
P,
T, Size/2, Step+1))
464 if (UseDown && !route(
P+Size/2,
T+Size/2, Size/2, Step+1))
470 bool ReverseDeltaNetwork::route(ElemType *
P, RowType *
T,
unsigned Size,
472 unsigned Pets = Log-1 - Step;
473 bool UseUp =
false, UseDown =
false;
478 const Coloring::MapType &M =
G.colors();
483 for (ElemType J = 0; J != Num; ++J) {
495 bool InpUp = I < Num/2;
497 ColorUp = InpUp ?
C :
G.other(C);
498 if ((C == ColorUp) != InpUp) {
505 S = (J < Num/2) ?
Pass : Switch;
508 S = (J < Num/2) ? Switch :
Pass;
516 for (ElemType J = 0,
E = Size / 2; J !=
E; ++J) {
518 ElemType PC =
P[J+Size/2];
521 if (
T[J][Pets] == Switch)
523 if (
T[J+Size/2][Pets] == Switch)
529 for (ElemType J = 0; J != Num; ++J)
530 if (
P[J] != Ignore &&
P[J] >= Num/2)
534 if (UseUp && !route(
P,
T, Size/2, Step+1))
536 if (UseDown && !route(
P+Size/2,
T+Size/2, Size/2, Step+1))
542 bool BenesNetwork::route(ElemType *
P, RowType *
T,
unsigned Size,
545 const Coloring::MapType &M =
G.colors();
550 unsigned Pets = 2*Log-1 - Step;
551 bool UseUp =
false, UseDown =
false;
557 for (ElemType J = 0; J != Num; ++J) {
565 ColorUp = (I < Num / 2) ? ColorKind::Red : ColorKind::Black;
567 unsigned CI = (I < Num/2) ? I+Num/2 : I-Num/2;
572 T[CI][Step] = Switch;
573 T[J][Pets] = (J < Num/2) ?
Pass : Switch;
577 T[CI][Step] = Switch;
580 T[J][Pets] = (J < Num/2) ? Switch :
Pass;
587 for (ElemType J = 0; J != Num/2; ++J) {
589 ElemType PC =
P[J+Num/2];
592 if (
T[J][Pets] == Switch)
594 if (
T[J+Num/2][Pets] == Switch)
600 for (ElemType J = 0; J != Num; ++J)
601 if (
P[J] != Ignore &&
P[J] >= Num/2)
605 if (UseUp && !route(
P,
T, Size/2, Step+1))
607 if (UseDown && !route(
P+Size/2,
T+Size/2, Size/2, Step+1))
620 bool isValue()
const {
return OpV.getNode() !=
nullptr; }
621 bool isValid()
const {
return isValue() || !(OpN & Invalid); }
622 static OpRef res(
int N) {
return OpRef(Whole | (N &
Index)); }
623 static OpRef
fail() {
return OpRef(Invalid); }
625 static OpRef lo(
const OpRef &R) {
627 return OpRef(R.OpN & (
Undef |
Index | LoHalf));
629 static OpRef hi(
const OpRef &R) {
631 return OpRef(R.OpN & (
Undef |
Index | HiHalf));
646 Invalid = 0x10000000,
649 Whole = LoHalf | HiHalf,
659 OpRef(
unsigned N) : OpN(N) {}
662 struct NodeTemplate {
663 NodeTemplate() =
default;
666 std::vector<OpRef> Ops;
676 unsigned push(
const NodeTemplate &Res) {
678 return List.size()-1;
680 unsigned push(
unsigned Opc,
MVT Ty, std::vector<OpRef> &&Ops) {
687 bool empty()
const {
return List.empty(); }
688 unsigned size()
const {
return List.size(); }
689 unsigned top()
const {
return size()-1; }
690 const NodeTemplate &operator[](
unsigned I)
const {
return List[
I]; }
691 unsigned reset(
unsigned NewTop) {
692 List.resize(NewTop+1);
696 using BaseType = std::vector<NodeTemplate>;
697 BaseType::iterator
begin() {
return List.begin(); }
698 BaseType::iterator
end() {
return List.end(); }
699 BaseType::const_iterator
begin()
const {
return List.begin(); }
700 BaseType::const_iterator
end()
const {
return List.end(); }
709 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 712 OpV.getNode()->print(OS, &G);
723 if ((OpN & Whole) != Whole) {
724 assert((OpN & Whole) == LoHalf || (OpN & Whole) == HiHalf);
738 for (
const auto &R : Ops) {
748 OS <<
"Input node:\n";
752 OS <<
"Result templates:\n";
753 for (
unsigned I = 0,
E =
List.size();
I !=
E; ++
I) {
754 OS <<
'[' <<
I <<
"] ";
755 List[
I].print(OS, G);
764 for (
unsigned I = 0,
E =
Mask.size();
I !=
E; ++
I) {
768 MinSrc = (MinSrc == -1) ? M : std::min(MinSrc, M);
769 MaxSrc = (MaxSrc == -1) ? M :
std::max(MaxSrc, M);
774 int MinSrc = -1, MaxSrc = -1;
776 ShuffleMask lo()
const {
777 size_t H = Mask.
size()/2;
780 ShuffleMask hi()
const {
781 size_t H = Mask.
size()/2;
786 OS <<
"MinSrc:" << MinSrc <<
", MaxSrc:" << MaxSrc <<
" {";
831 void materialize(
const ResultStack &
Results);
839 OpRef
concat(OpRef Va, OpRef Vb, ResultStack &Results);
840 OpRef packs(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
842 OpRef packp(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
845 ResultStack &Results);
847 ResultStack &Results);
849 OpRef shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results);
850 OpRef shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
851 OpRef shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results);
852 OpRef shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
854 OpRef butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results);
855 OpRef contracting(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
856 OpRef expanding(ShuffleMask SM, OpRef Va, ResultStack &Results);
857 OpRef perfect(ShuffleMask SM, OpRef Va, ResultStack &Results);
859 bool selectVectorConstants(
SDNode *N);
868 unsigned VecLen = Mask.
size();
870 for (
unsigned I = 0;
I != VecLen; ++
I) {
873 MaskL[
I] = MaskR[
I] = -1;
874 }
else if (
unsigned(M) < VecLen) {
889 for (
unsigned I = 1;
I != MaxLen; ++
I) {
894 return {
F, MaxLen };
905 for (
int I = 0,
E = Mask.
size();
I !=
E; ++
I) {
907 if (M >= 0 && M !=
I)
915 assert(Mask.
size() < 0x00007FFF &&
"Sanity failure");
917 for (
int Idx : Mask) {
923 return 2*Sum == N*(N-1);
926 bool HvxSelector::selectVectorConstants(
SDNode *
N) {
941 auto IsNodeToSelect = [] (
SDNode *
N) {
949 SDValue Addr = cast<LoadSDNode>(
N)->getBasePtr();
964 for (
unsigned i = 0; i != WorkQ.
size(); ++i) {
966 if (IsNodeToSelect(W))
975 return !Nodes.empty();
978 void HvxSelector::materialize(
const ResultStack &
Results) {
980 dbgs() <<
"Materializing\n";
985 const SDLoc &dl(Results.InpNode);
986 std::vector<SDValue> Output;
988 for (
unsigned I = 0,
E = Results.size();
I !=
E; ++
I) {
989 const NodeTemplate &Node = Results[
I];
990 std::vector<SDValue> Ops;
991 for (
const OpRef &R : Node.Ops) {
994 Ops.push_back(R.OpV);
999 Ops.push_back(
ISel.selectUndef(dl,
MVT(SVT)));
1003 unsigned Part = R.OpN & OpRef::Whole;
1007 assert(Idx >= 0 &&
unsigned(Idx) < Output.size());
1009 MVT OpTy = Op.getValueType().getSimpleVT();
1010 if (Part != OpRef::Whole) {
1011 assert(Part == OpRef::LoHalf || Part == OpRef::HiHalf);
1014 unsigned Sub = (Part == OpRef::LoHalf) ? Hexagon::vsub_lo
1022 SDNode *ResN = (Node.Opc == TargetOpcode::COPY)
1023 ? Ops.front().getNode()
1025 Output.push_back(
SDValue(ResN, 0));
1028 SDNode *OutN = Output.back().getNode();
1029 SDNode *InpN = Results.InpNode;
1031 dbgs() <<
"Generated node:\n";
1036 selectVectorConstants(OutN);
1040 OpRef HvxSelector::concat(OpRef
Lo, OpRef
Hi, ResultStack &Results) {
1042 const SDLoc &dl(Results.InpNode);
1048 return OpRef::res(Results.top());
1052 OpRef HvxSelector::packs(ShuffleMask SM, OpRef Va, OpRef Vb,
1056 if (!Va.isValid() || !Vb.isValid())
1059 int VecLen = SM.Mask.size();
1062 auto IsExtSubvector = [] (ShuffleMask M) {
1063 assert(M.MinSrc >= 0 && M.MaxSrc >= 0);
1064 for (
int I = 0,
E = M.Mask.size();
I !=
E; ++
I) {
1065 if (M.Mask[
I] >= 0 && M.Mask[
I]-
I != M.MinSrc)
1071 if (SM.MaxSrc - SM.MinSrc <
int(
HwLen)) {
1072 if (SM.MinSrc == 0 || SM.MinSrc ==
int(
HwLen) || !IsExtSubvector(SM)) {
1078 if (SM.MaxSrc <
int(
HwLen)) {
1079 memcpy(NewMask.
data(), SM.Mask.data(),
sizeof(int)*VecLen);
1082 if (SM.MinSrc >=
int(
HwLen)) {
1083 for (
int I = 0;
I != VecLen; ++
I) {
1092 int MinSrc = SM.MinSrc;
1093 if (SM.MaxSrc <
int(
HwLen)) {
1095 }
else if (SM.MinSrc >
int(
HwLen)) {
1097 MinSrc = SM.MinSrc -
HwLen;
1099 const SDLoc &dl(Results.InpNode);
1100 if (isUInt<3>(MinSrc) || isUInt<3>(
HwLen-MinSrc)) {
1101 bool IsRight = isUInt<3>(MinSrc);
1104 unsigned Opc = IsRight ? Hexagon::V6_valignbi
1105 : Hexagon::V6_vlalignbi;
1106 Results.push(Opc, Ty, {Vb, Va, S});
1109 Results.push(Hexagon::A2_tfrsi,
MVT::i32, {S});
1110 unsigned Top = Results.top();
1111 Results.push(Hexagon::V6_valignb, Ty, {Vb, Va, OpRef::res(Top)});
1113 for (
int I = 0;
I != VecLen; ++
I) {
1119 return OpRef::res(Results.top());
1122 if (Options & PackMux) {
1128 for (
int I = 0;
I != VecLen; ++
I) {
1132 if (M >=
int(
HwLen))
1143 return vmuxs(MuxBytes, Va, Vb, Results);
1149 OpRef HvxSelector::packp(ShuffleMask SM, OpRef Va, OpRef Vb,
1152 unsigned HalfMask = 0;
1154 for (
int M : SM.Mask) {
1157 HalfMask |= (1u << (M >> LogHw));
1170 OpRef Inp[2] = { Va, Vb };
1171 OpRef Out[2] = { OpRef::undef(HalfTy), OpRef::undef(HalfTy) };
1173 uint8_t HalfIdx[4] = { 0xFF, 0xFF, 0xFF, 0xFF };
1175 for (
unsigned I = 0;
I != 4; ++
I) {
1176 if ((HalfMask & (1u <<
I)) == 0)
1179 OpRef
Op = Inp[
I/2];
1180 Out[Idx] = (
I & 1) ? OpRef::hi(Op) : OpRef::lo(Op);
1184 int VecLen = SM.Mask.size();
1185 for (
int I = 0;
I != VecLen; ++
I) {
1188 uint8_t Idx = HalfIdx[M >> LogHw];
1189 assert(Idx == 0 || Idx == 1);
1195 return concat(Out[0], Out[1], Results);
1199 ResultStack &Results) {
1203 const SDLoc &dl(Results.InpNode);
1204 SDValue B = getVectorConstant(Bytes, dl);
1205 Results.push(Hexagon::V6_vd0, ByteTy, {});
1206 Results.push(Hexagon::V6_veqb, BoolTy, {OpRef(B), OpRef::res(-1)});
1207 Results.push(Hexagon::V6_vmux, ByteTy, {OpRef::res(-1), Vb, Va});
1208 return OpRef::res(Results.top());
1212 ResultStack &Results) {
1214 size_t S = Bytes.
size() / 2;
1217 return concat(L, H, Results);
1220 OpRef HvxSelector::shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1222 unsigned VecLen = SM.Mask.size();
1225 assert(
all_of(SM.Mask, [
this](
int M) { return M == -1 || M < int(HwLen); }));
1232 OpRef
P = perfect(SM, Va, Results);
1235 return butterfly(SM, Va, Results);
1238 OpRef HvxSelector::shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb,
1239 ResultStack &Results) {
1244 OpRef
C = contracting(SM, Va, Vb, Results);
1248 int VecLen = SM.Mask.size();
1250 OpRef
P = packs(SM, Va, Vb, Results, NewMask);
1252 return shuffs1(ShuffleMask(NewMask),
P,
Results);
1257 OpRef L = shuffs1(ShuffleMask(MaskL), Va, Results);
1258 OpRef R = shuffs1(ShuffleMask(MaskR), Vb, Results);
1259 if (!L.isValid() || !R.isValid())
1263 for (
int I = 0;
I != VecLen; ++
I) {
1267 return vmuxs(Bytes, L, R, Results);
1270 OpRef HvxSelector::shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1272 int VecLen = SM.Mask.size();
1280 OpRef
P = packs(SM, OpRef::lo(Va), OpRef::hi(Va), Results, PackedMask);
1282 ShuffleMask PM(PackedMask);
1283 OpRef
E = expanding(PM, P, Results);
1287 OpRef L = shuffs1(PM.lo(),
P,
Results);
1289 if (L.isValid() && H.isValid())
1290 return concat(L, H, Results);
1293 OpRef R = perfect(SM, Va, Results);
1298 OpRef L = shuffs2(SM.lo(), OpRef::lo(Va), OpRef::hi(Va),
Results);
1299 OpRef
H = shuffs2(SM.hi(), OpRef::lo(Va), OpRef::hi(Va),
Results);
1300 if (L.isValid() && H.isValid())
1301 return concat(L, H, Results);
1306 OpRef HvxSelector::shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb,
1307 ResultStack &Results) {
1312 int VecLen = SM.Mask.size();
1314 OpRef
P = packp(SM, Va, Vb, Results, PackedMask);
1316 return shuffp1(ShuffleMask(PackedMask),
P,
Results);
1321 OpRef L = shuffp1(ShuffleMask(MaskL), Va, Results);
1322 OpRef R = shuffp1(ShuffleMask(MaskR), Vb, Results);
1323 if (!L.isValid() || !R.isValid())
1328 for (
int I = 0;
I != VecLen; ++
I) {
1332 return vmuxp(Bytes, L, R, Results);
1337 template <
typename T>
1344 template <
typename T>
1345 struct NullifyingVector :
public T {
1347 NullifyingVector(
T &&V) :
T(V) {
1348 for (
unsigned i = 0, e =
T::size(); i != e; ++i) {
1349 SDNode *&N = T::operator[](i);
1354 auto F = Refs.
find(N);
1355 if (
F != Refs.
end())
1356 *
F->second =
nullptr;
1367 unsigned VecLen = Mask.
size();
1368 bool HavePairs = (2*
HwLen == VecLen);
1387 Deleter DUA(
DAG, AllNodes);
1392 for (
int I : Mask) {
1421 if (2*
HwLen == VecLen) {
1451 std::map<SDNode*,unsigned> NumOps;
1454 for (
unsigned I = 0;
I != SubNodes.
size(); ++
I) {
1458 if (AllNodes.
count(
Op.getNode()))
1463 NumOps.insert({S, OpN});
1468 for (
unsigned I = 0;
I != TmpQ.
size(); ++
I) {
1471 if (!SubNodes.
count(U))
1473 auto F = NumOps.find(U);
1481 NullifyingVector<decltype(TmpQ)::vector_type> Queue(TmpQ.
takeVector());
1483 Deleter DUQ(
DAG, Queue);
1492 OpRef HvxSelector::contracting(ShuffleMask SM, OpRef Va, OpRef Vb,
1493 ResultStack &Results) {
1495 if (!Va.isValid() || !Vb.isValid())
1505 int VecLen = SM.Mask.size();
1506 std::pair<int,unsigned> Strip =
findStrip(SM.Mask, 1, VecLen);
1511 if (Strip.second != 1 && Strip.second != 2)
1527 int NextInMask = SM.Mask[Strip.second];
1532 if (NextInMask < VecLen) {
1534 if (Strip.first == 0 && Strip.second == 1 && NextInMask == 4) {
1537 for (
int I = 0;
I != N/4; ++
I)
1538 if (SM.Mask[
I] != 4*
I)
1540 for (
int I = 0;
I != N/4; ++
I)
1541 if (SM.Mask[
I+N/4] != 2 + 4*
I)
1543 for (
int I = 0;
I != N/4; ++
I)
1544 if (SM.Mask[
I+N/2] != N + 4*
I)
1546 for (
int I = 0;
I != N/4; ++
I)
1547 if (SM.Mask[
I+3*N/4] != N+2 + 4*
I)
1550 Results.push(Hexagon::V6_vdealb4w, ResTy, {Vb, Va});
1551 return OpRef::res(Results.top());
1556 int L = Strip.second;
1558 if (Strip.first != 0 && Strip.first != L)
1561 for (
int I = L;
I <
N;
I += L) {
1562 auto S =
findStrip(SM.Mask.drop_front(
I), 1, N-
I);
1565 if (S.first - Strip.first != 2*
I)
1568 if (S.second !=
unsigned(L))
1574 assert(Strip.first == 0 || Strip.first == L);
1575 using namespace Hexagon;
1577 Res.Opc = Strip.second == 1
1578 ? (Strip.first == 0 ? V6_vpackeb : V6_vpackob)
1579 : (Strip.first == 0 ? V6_vpackeh : V6_vpackoh);
1581 Res.Ops = { Vb, Va };
1583 return OpRef::res(Results.top());
1588 int L = Strip.second;
1589 std::pair<int,unsigned> PrevS = Strip;
1591 for (
int I = L;
I <
N;
I += L) {
1592 auto S =
findStrip(SM.Mask.drop_front(
I), 1, N-
I);
1593 if (S.second != PrevS.second)
1595 int Diff = Flip ? PrevS.first - S.first + 2*L
1596 : S.first - PrevS.first;
1604 assert(Strip.first == 0 || Strip.first == L);
1605 using namespace Hexagon;
1607 Res.Opc = Strip.second == 1
1608 ? (Strip.first == 0 ? V6_vshuffeb : V6_vshuffob)
1609 : (Strip.first == 0 ? V6_vshufeh : V6_vshufoh);
1611 Res.Ops = { Vb, Va };
1613 return OpRef::res(Results.top());
1616 OpRef HvxSelector::expanding(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1629 int VecLen = SM.Mask.size();
1630 assert(2*
HwLen ==
unsigned(VecLen) &&
"Expecting vector-pair type");
1632 std::pair<int,unsigned> Strip =
findStrip(SM.Mask, 1, VecLen);
1639 if (Strip.first != 0)
1643 if (Strip.second != 1 && Strip.second != 2)
1647 int L = Strip.second;
1650 for (
int I = 2*L;
I < 2*
N;
I += 2*L) {
1651 auto S =
findStrip(SM.Mask.drop_front(
I), 1, N-
I);
1652 if (S.second !=
unsigned(L))
1658 for (
int I = L;
I < 2*
N;
I += 2*L) {
1659 auto S =
findStrip(SM.Mask.drop_front(
I), 0, N-
I);
1660 if (S.first != -1 || S.second !=
unsigned(L))
1664 unsigned Opc = Strip.second == 1 ? Hexagon::V6_vunpackub
1665 : Hexagon::V6_vunpackuh;
1667 return OpRef::res(Results.top());
1670 OpRef HvxSelector::perfect(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1679 int VecLen = SM.Mask.size();
1681 unsigned LogLen =
Log2_32(VecLen);
1685 assert(LogLen == HwLog || LogLen == HwLog+1);
1686 bool Extend = (LogLen == HwLog);
1771 unsigned X = Mask[0] ^ Mask[Num/2];
1773 if ((Mask[0] & X) != 0)
1775 for (
unsigned I = 1;
I != Num/2; ++
I) {
1776 if (
unsigned(Mask[
I] ^ Mask[
I+Num/2]) !=
X)
1778 if ((Mask[
I] & X) != 0)
1787 for (
unsigned I = VecLen;
I >= 2;
I >>= 1) {
1789 unsigned X = XorPow2(SM.Mask,
I);
1793 for (
int J =
I; J < VecLen; J +=
I) {
1794 if (XorPow2(SM.Mask.slice(J,
I),
I) != X)
1826 std::set<CycleType> Cycles;
1827 std::set<unsigned> All;
1829 for (
unsigned I : Perm)
1834 auto canonicalize = [LogLen](
const CycleType &
C) -> CycleType {
1835 unsigned LogPos, N =
C.size();
1836 for (LogPos = 0; LogPos !=
N; ++LogPos)
1837 if (
C[LogPos] == LogLen-1)
1842 CycleType NewC(
C.begin()+LogPos,
C.end());
1843 NewC.append(
C.begin(),
C.begin()+LogPos);
1847 auto pfs = [](
const std::set<CycleType> &Cs,
unsigned Len) {
1852 const CycleType &
C = *Cs.begin();
1855 int D = Len - C.size();
1856 if (D != 0 && D != 1)
1859 bool IsDeal =
true, IsShuff =
true;
1860 for (
unsigned I = 1;
I != Len-
D; ++
I) {
1861 if (C[
I] != Len-1-
I)
1863 if (C[
I] !=
I-(1-D))
1867 assert(!(IsDeal || IsShuff) || IsDeal != IsShuff);
1868 static unsigned Deals[] = { Hexagon::V6_vdealb, Hexagon::V6_vdealh };
1869 static unsigned Shufs[] = { Hexagon::V6_vshuffb, Hexagon::V6_vshuffh };
1870 return IsDeal ? Deals[
D] : (IsShuff ? Shufs[
D] : 0);
1873 while (!All.empty()) {
1874 unsigned A = *All.begin();
1878 for (
unsigned B = Perm[A];
B != A;
B = Perm[
B]) {
1891 if (
unsigned(VecLen) ==
HwLen) {
1892 if (
unsigned SingleOpc = pfs(Cycles, LogLen)) {
1893 Results.push(SingleOpc, SingleTy, {Va});
1894 return OpRef::res(Results.top());
1899 if (
HwLen ==
unsigned(VecLen))
1902 for (
const CycleType &
C : Cycles) {
1903 unsigned First = (
C[0] == LogLen-1) ? 1 : 0;
1904 SwapElems.append(
C.begin()+First,
C.end());
1906 SwapElems.push_back(
C[0]);
1909 const SDLoc &dl(Results.InpNode);
1910 OpRef
Arg = !Extend ? Va
1911 :
concat(Va, OpRef::undef(SingleTy), Results);
1913 for (
unsigned I = 0,
E = SwapElems.size();
I !=
E; ) {
1914 bool IsInc =
I ==
E-1 || SwapElems[
I] < SwapElems[
I+1];
1915 unsigned S = (1u << SwapElems[
I]);
1917 while (++
I <
E-1 && IsInc == (SwapElems[
I] < SwapElems[
I+1]))
1918 S |= 1u << SwapElems[
I];
1921 S |= 1u << SwapElems[
I];
1926 Results.push(Hexagon::A2_tfrsi,
MVT::i32,
1928 Res.Opc = IsInc ? Hexagon::V6_vshuffvdd : Hexagon::V6_vdealvdd;
1930 Res.Ops = { OpRef::hi(Arg), OpRef::lo(Arg), OpRef::res(-1) };
1932 Arg = OpRef::res(Results.top());
1935 return !Extend ?
Arg : OpRef::lo(Arg);
1938 OpRef HvxSelector::butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1951 PermNetwork::Controls
FC, RC;
1952 const SDLoc &dl(Results.InpNode);
1953 int VecLen = SM.Mask.size();
1955 for (
int M : SM.Mask) {
1956 if (M != -1 && M >= VecLen)
1961 ForwardDeltaNetwork FN(SM.Mask);
1963 SDValue Ctl = getVectorConstant(FC, dl);
1964 Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(Ctl)});
1965 return OpRef::res(Results.top());
1969 ReverseDeltaNetwork
RN(SM.Mask);
1971 SDValue Ctl = getVectorConstant(RC, dl);
1972 Results.push(Hexagon::V6_vrdelta, ResTy, {Va, OpRef(Ctl)});
1973 return OpRef::res(Results.top());
1977 BenesNetwork BN(SM.Mask);
1978 if (BN.run(FC, RC)) {
1979 SDValue CtlF = getVectorConstant(FC, dl);
1980 SDValue CtlR = getVectorConstant(RC, dl);
1981 Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(CtlF)});
1982 Results.push(Hexagon::V6_vrdelta, ResTy,
1983 {OpRef::res(-1), OpRef(CtlR)});
1984 return OpRef::res(Results.top());
1993 for (uint8_t
C : Data)
2004 dbgs() <<
"Starting " << __func__ <<
" on node:\n";
2009 assert(ResTy.isVector() && ResTy.getVectorElementType() ==
MVT::i8);
2011 auto *SN = cast<ShuffleVectorSDNode>(
N);
2012 std::vector<int>
Mask(SN->getMask().begin(), SN->getMask().end());
2014 for (
int &Idx : Mask)
2015 if (Idx != -1 && Idx < 0)
2018 unsigned VecLen = Mask.size();
2019 bool HavePairs = (2*
HwLen == VecLen);
2020 assert(ResTy.getSizeInBits() / 8 == VecLen);
2025 bool UseLeft =
false, UseRight =
false;
2026 for (
unsigned I = 0;
I != VecLen; ++
I) {
2029 unsigned Idx = Mask[
I];
2038 dbgs() <<
"VecLen=" << VecLen <<
" HwLen=" <<
HwLen <<
" UseLeft=" 2039 << UseLeft <<
" UseRight=" << UseRight <<
" HavePairs=" 2040 << HavePairs <<
'\n';
2043 if (!UseLeft && !UseRight) {
2051 Results.push(TargetOpcode::COPY, ResTy, {Vec0});
2052 Results.push(TargetOpcode::COPY, ResTy, {Vec1});
2053 OpRef Va = OpRef::res(Results.top()-1);
2054 OpRef Vb = OpRef::res(Results.top());
2056 OpRef Res = !HavePairs ? shuffs2(ShuffleMask(Mask), Va, Vb, Results)
2057 : shuffp2(ShuffleMask(Mask), Va, Vb, Results);
2059 bool Done = Res.isValid();
2062 Results.push(TargetOpcode::COPY, ResTy, {Res});
2063 materialize(Results);
2065 Done = scalarizeShuffle(Mask,
SDLoc(N), ResTy, Vec0, Vec1, N);
2070 dbgs() <<
"Unhandled shuffle:\n";
2085 if (
auto *CN = dyn_cast<ConstantSDNode>(RotV.
getNode())) {
2089 }
else if (isUInt<3>(S)) {
2112 void HexagonDAGToDAGISel::SelectHvxShuffle(
SDNode *N) {
2116 void HexagonDAGToDAGISel::SelectHvxRor(
SDNode *N) {
2120 void HexagonDAGToDAGISel::SelectHvxVAlign(
SDNode *N) {
2134 unsigned IntNo = cast<ConstantSDNode>(N->
getOperand(1))->getZExtValue();
2140 Opcode = Hexagon::V6_vgathermhq_pseudo;
2144 Opcode = Hexagon::V6_vgathermwq_pseudo;
2148 Opcode = Hexagon::V6_vgathermhwq_pseudo;
2154 SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
2157 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
2159 ReplaceNode(N, Result);
2171 unsigned IntNo = cast<ConstantSDNode>(N->
getOperand(1))->getZExtValue();
2177 Opcode = Hexagon::V6_vgathermh_pseudo;
2181 Opcode = Hexagon::V6_vgathermw_pseudo;
2185 Opcode = Hexagon::V6_vgathermhw_pseudo;
2191 SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
2194 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
2196 ReplaceNode(N, Result);
2200 unsigned IID = cast<ConstantSDNode>(N->
getOperand(0))->getZExtValue();
2207 Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry,
SDLoc(N), VTs, Ops);
2214 Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry,
SDLoc(N), VTs, Ops);
2221 Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry,
SDLoc(N), VTs, Ops);
2228 Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry,
SDLoc(N), VTs, Ops);
2234 ReplaceUses(N, Result);
2237 CurDAG->RemoveDeadNode(N);
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&... Ranges)
Concatenated range across two or more ranges.
Pass interface - Implemented by all 'passes'.
const_iterator end(StringRef path)
Get end iterator over path.
const HexagonTargetLowering & Lower
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
This class represents lattice values for constants.
size_type size() const
Determine the number of elements in the SetVector.
static MVT getVectorVT(MVT VT, unsigned NumElements)
HexagonDAGToDAGISel & ISel
Implements a dense probed hash-table based set.
void push_back(const T &Elt)
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void selectVAlign(SDNode *N)
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
unsigned getVectorNumElements() const
Function Alias Analysis Results
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
static void dump(StringRef Title, SpillInfo const &Spills)
static bool isPermutation(ArrayRef< int > Mask)
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
iterator end()
Get an iterator to the end of the SetVector.
std::pair< MCSymbol *, MachineModuleInfoImpl::StubValueTy > PairTy
#define DEBUG_WITH_TYPE(TYPE, X)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
static void splitMask(ArrayRef< int > Mask, MutableArrayRef< int > MaskL, MutableArrayRef< int > MaskR)
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
const HexagonInstrInfo * TII
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
iterator_range< allnodes_iterator > allnodes()
unsigned getSizeInBits() const
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
MVT getSingleVT(MVT ElemTy) const
bool insert(const value_type &X)
Insert a new element into the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
virtual const TargetInstrInfo * getInstrInfo() const
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
ArrayRef< SDUse > ops() const
MVT getVectorElementType() const
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
TargetInstrInfo - Interface to description of machine instruction set.
iterator find(const_arg_type_t< KeyT > Val)
Error build(ArrayRef< Module *> Mods, SmallVector< char, 0 > &Symtab, StringTableBuilder &StrtabBuilder, BumpPtrAllocator &Alloc)
Fills in Symtab and StrtabBuilder with a valid symbol and string table for Mods.
static const HexagonTargetLowering & getHexagonLowering(SelectionDAG &G)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
constexpr bool isUInt< 8 >(uint64_t x)
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
This is an important class for using LLVM in a threaded context.
size_t size() const
size - Get the array size.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const SDValue & getOperand(unsigned Num) const
static const HexagonSubtarget & getHexagonSubtarget(SelectionDAG &G)
static std::pair< int, unsigned > findStrip(ArrayRef< int > A, int Inc, unsigned MaxLen)
std::pair< iterator, bool > insert(const ValueT &V)
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void selectRor(SDNode *N)
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const HexagonSubtarget & HST
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
BaseType
A given derived pointer can have multiple base pointers through phi/selects.
bool use_empty() const
Return true if there are no uses of this node.
static bool isUndef(ArrayRef< int > Mask)
void dump() const
Dump this node, for debugging.
const TargetLowering & getTargetLoweringInfo() const
print lazy value Lazy Value Info Printer Pass
Vector takeVector()
Clear the SetVector and return the underlying vector.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Color
A "color", which is either even or odd.
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Represents one node in the SelectionDAG.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
iterator_range< use_iterator > uses()
amdgpu Simplify well known AMD library false Value Value * Arg
HvxSelector(HexagonDAGToDAGISel &HS, SelectionDAG &G)
ArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
unsigned getVectorLength() const
pointer data()
Return a pointer to the vector's buffer, even if empty().
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
const TargetSubtargetInfo & getSubtarget() const
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
static bool isIdentity(ArrayRef< int > Mask)
void SelectV65Gather(SDNode *N)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
unsigned getOpcode() const
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
void selectShuffle(SDNode *N)
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void dumpr() const
Dump (recursively) this node and its use-def subgraph.
MVT getPairVT(MVT ElemTy) const
A vector that has set insertion semantics.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
This class implements an extremely fast bulk output stream that can only output to a stream...
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
void SelectHVXDualOutput(SDNode *N)
const SDValue & getOperand(unsigned i) const
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
void SelectV65GatherPred(SDNode *N)
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg)
LLVMContext * getContext() const
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...