15 #define DEBUG_TYPE "hexagon-shuffle" 46 enum { MAX = 360360 };
50 HexagonBid() =
default;
54 bool isSold()
const {
return (Bid >= MAX); }
63 class HexagonUnitAuction {
69 HexagonUnitAuction(
unsigned cs = 0) : isSold(cs) {}
72 bool bid(
unsigned B) {
74 unsigned b = B & ~isSold;
79 Scores[i] += HexagonBid(b);
80 isSold |= Scores[i].isSold() << i;
92 const unsigned SlotWeight = 8;
93 const unsigned MaskWeight = SlotWeight - 1;
95 unsigned Key = ((1u << s) & Units) != 0;
100 if (Key == 0 || Units == 0 || (SlotWeight * s >= 32))
105 Weight = (1u << (SlotWeight * s)) * ((MaskWeight - Ctpop) << Cttz);
111 UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
121 (CPU ==
"hexagonv60")
123 :
UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
125 UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
129 UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
134 UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
136 UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
140 UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
155 setLanes((*TUL)[T].
second);
176 for (
unsigned i = 1; i < Lanes; ++i)
177 startBit = (startBit << 1) | startBit;
182 unsigned usedUnits) {
183 if (startIdx < hvxInsts.
size()) {
184 if (!hvxInsts[startIdx].Units)
186 for (
unsigned b = 0x1; b <= 0x8; b <<= 1) {
187 if ((hvxInsts[startIdx].Units & b) == 0)
189 unsigned allBits =
makeAllBits(b, hvxInsts[startIdx].Lanes);
190 if ((allBits & usedUnits) == 0) {
191 if (
checkHVXPipes(hvxInsts, startIdx + 1, usedUnits | allBits))
203 : Context(Context), MCII(MCII), STI(STI), ReportErrors(ReportErrors) {
223 }
jumpSlots[] = {{8, 4}, {8, 2}, {8, 1}, {4, 2}, {4, 1}, {2, 1}};
224 #define MAX_JUMP_SLOTS (sizeof(jumpSlots) / sizeof(jumpSlots[0])) 227 bool HasRestrictSlot1AOK =
false;
230 MCInst const &Inst = ISJ->getDesc();
232 HasRestrictSlot1AOK =
true;
233 RestrictLoc = Inst.
getLoc();
236 if (HasRestrictSlot1AOK)
238 MCInst const &Inst = ISJ->getDesc();
243 unsigned Units = ISJ->Core.getUnits();
247 "Instruction was restricted from being in slot 1"));
249 std::make_pair(RestrictLoc,
"Instruction can only be combine " 250 "with an ALU instruction in slot 1"));
251 ISJ->Core.setUnits(Units & ~2U);
258 bool HasRestrictNoSlot1Store =
false;
261 MCInst const &Inst = ISJ->getDesc();
263 HasRestrictNoSlot1Store =
true;
264 RestrictLoc = Inst.
getLoc();
267 if (HasRestrictNoSlot1Store) {
268 bool AppliedRestriction =
false;
270 MCInst const &Inst = ISJ->getDesc();
272 unsigned Units = ISJ->Core.getUnits();
274 AppliedRestriction =
true;
277 "Instruction was restricted from being in slot 1"));
278 ISJ->Core.setUnits(Units & ~2U);
282 if (AppliedRestriction)
284 RestrictLoc,
"Instruction does not allow a store in slot 1"));
296 const unsigned slotSingleLoad = 0x1, slotSingleStore = 0x1,
298 slotFirstLoadStore = 0x2, slotLastLoadStore = 0x1;
301 unsigned slotLoadStore = slotFirstLoadStore;
304 unsigned memory = 0, loads = 0, load0 = 0,
stores = 0, store0 = 0, store1 = 0;
305 unsigned NonZCVIloads = 0, AllCVIloads = 0, CVIstores = 0;
308 unsigned pSlot3Cnt = 0;
311 std::vector<iterator> foundBranches;
312 unsigned reservedSlots = 0;
330 foundBranches.push_back(ISJ);
345 if (ISJ->Core.getUnits() == slotSingleLoad ||
349 foundBranches.push_back(ISJ);
364 if (ISJ->Core.getUnits() == slotSingleStore ||
377 foundBranches.push_back(ISJ);
383 if (ISJ->Core.getUnits() == slotSingleLoad ||
397 foundBranches.push_back(ISJ);
404 foundBranches.push_back(ISJ);
406 foundBranches.push_back(ISJ);
408 foundBranches.push_back(ISJ);
410 foundBranches.push_back(ISJ);
418 const unsigned ZCVIloads = AllCVIloads - NonZCVIloads;
419 const bool ValidHVXMem =
420 NonZCVIloads <= 1 && ZCVIloads <= 1 && CVIstores <= 1;
421 if ((load0 > 1 || store0 > 1 || !ValidHVXMem) ||
422 (duplex > 1 || (duplex && memory))) {
429 bool bOnlySlot3 =
false;
433 if (!ISJ->Core.getUnits()) {
440 if (loads == 1 && loads == memory && memops == 0)
443 case Hexagon::V6_vgathermw:
444 case Hexagon::V6_vgathermh:
445 case Hexagon::V6_vgathermhw:
446 case Hexagon::V6_vgathermwq:
447 case Hexagon::V6_vgathermhq:
448 case Hexagon::V6_vgathermhwq:
452 ISJ->Core.setUnits(ISJ->Core.getUnits() & slotSingleLoad);
458 if (slotLoadStore < slotLastLoadStore) {
461 llvm::Twine(
"invalid instruction packet: too many loads"));
465 ISJ->Core.setUnits(ISJ->Core.getUnits() & slotLoadStore);
476 ISJ->Core.setUnits(ISJ->Core.getUnits() & slotSingleStore);
478 if (slotLoadStore < slotLastLoadStore) {
484 ISJ->Core.setUnits(ISJ->Core.getUnits() & slotLoadStore);
489 if (store1 &&
stores > 1) {
497 if (ISJ->Core.getUnits() == slotThree)
500 if (!ISJ->Core.getUnits()) {
508 bool validateSlots =
true;
509 if (foundBranches.size() > 1) {
510 if (foundBranches.size() > 2) {
530 HexagonUnitAuction AuctionCore(reservedSlots);
536 if (!AuctionCore.bid(
I->Core.getUnits()))
541 validateSlots =
false;
553 if (foundBranches.size() <= 1 && bOnlySlot3 ==
false && pSlot3Cnt == 1 &&
555 validateSlots =
true;
558 unsigned saveUnits = slot3ISJ->Core.getUnits();
559 slot3ISJ->Core.setUnits(saveUnits & slotThree);
561 HexagonUnitAuction AuctionCore(reservedSlots);
567 if (!AuctionCore.bid(
I->Core.getUnits()))
572 validateSlots =
false;
577 ISJ->Core.setUnits(saveUnits);
584 HexagonUnitAuction AuctionCore(reservedSlots);
589 if (!AuctionCore.bid(
I->Core.getUnits())) {
601 inst.
Units =
I->CVI.getUnits();
602 inst.
Lanes =
I->CVI.getLanes();
608 if (hvxInsts.
size() > 0) {
609 unsigned startIdx, usedUnits;
610 startIdx = usedUnits = 0x0;
636 unsigned slotSkip, slotWeight;
639 for (ISJ = ISK = Packet.
begin(), slotSkip = slotWeight = 0;
640 ISK != Packet.
end(); ++ISK, ++slotSkip)
641 if (slotSkip < nSlot - emptySlots)
646 slotWeight += ISK->Core.setWeight(HEXAGON_PACKET_SIZE - nSlot - 1);
651 std::stable_sort(ISJ, Packet.
end());
661 dbgs() << ISJ->CVI.getLanes();
std::string & operator+=(std::string &buffer, StringRef string)
static bool lessCore(const HexagonInstr &A, const HexagonInstr &B)
void setUnits(unsigned s)
This class represents lattice values for constants.
unsigned getOtherReservedSlots(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst const &MCI)
Return the slots this instruction consumes in addition to the slot(s) it can execute out of...
void push_back(const T &Elt)
void append(MCInst const &ID, MCInst const *Extender, unsigned S)
unsigned setWeight(unsigned s)
bool mayLoad() const
Return true if this instruction could possibly read memory.
void applySlotRestrictions()
bool isReturn() const
Return true if the instruction is a return.
void PrintMessage(raw_ostream &OS, SMLoc Loc, DiagKind Kind, const Twine &Msg, ArrayRef< SMRange > Ranges=None, ArrayRef< SMFixIt > FixIts=None, bool ShowColors=true) const
Emit a message about the specified location with the specified string.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static Optional< unsigned > getOpcode(ArrayRef< VPValue *> Values)
Returns the opcode of Values or ~0 if they do not all agree.
bool isRestrictNoSlot1Store(MCInstrInfo const &MCII, MCInst const &MCI)
Context object for machine code objects.
HexagonPacket::iterator iterator
raw_ostream & write_hex(unsigned long long N)
Output N in hexadecimal, without any prefix or padding.
const MCInst * getInst() const
Instances of this class represent a single low-level machine instruction.
HexagonCVIResource(TypeUnitsAndLanes *TUL, MCInstrInfo const &MCII, unsigned s, MCInst const *id)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
The instances of the Type class are immutable: once they are created, they are never changed...
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
MCSubtargetInfo const & STI
Interface to description of machine instruction set.
MCInstrDesc const & getDesc(MCInstrInfo const &MCII, MCInst const &MCI)
void reportError(SMLoc L, const Twine &Msg)
bool isMemReorderDisabled() const
void restrictNoSlot1Store()
const SourceMgr * getSourceManager() const
bool prefersSlot3(MCInstrInfo const &MCII, MCInst const &MCI)
static unsigned makeAllBits(unsigned startBit, unsigned Lanes)
unsigned countPopulation(T Value)
Count the number of set bits in a value.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
const MCOperand & getOperand(unsigned i) const
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool mayStore() const
Return true if this instruction could possibly modify memory.
std::pair< unsigned, unsigned > UnitsAndLanes
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
void reportError(Twine const &Msg)
HexagonShuffler(MCContext &Context, bool ReportErrors, MCInstrInfo const &MCII, MCSubtargetInfo const &STI)
Generic base class for all target subtargets.
static bool checkHVXPipes(const HVXInstsT &hvxInsts, unsigned startIdx, unsigned usedUnits)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isBranch(unsigned Opcode)
#define HEXAGON_PACKET_SIZE
static struct @435 jumpSlots[]
std::vector< std::pair< SMLoc, std::string > > AppliedRestrictions
unsigned getUnits(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst const &MCI)
Return the slots used by the insn.
static bool lessCVI(const HexagonInstr &A, const HexagonInstr &B)
StringRef - Represent a constant reference to a string, i.e.
unsigned getType(MCInstrInfo const &MCII, MCInst const &MCI)
Return the Hexagon ISA class for the insn.
Represents a location in source code.
unsigned getOpcode() const
static void SetupTUL(TypeUnitsAndLanes *TUL, StringRef CPU)
bool isRestrictSlot1AOK(MCInstrInfo const &MCII, MCInst const &MCI)
Return whether the insn can be packaged only with an A-type insn in slot #1.
bool check()
Check that the packet is legal and enforce relative insn order.