34 #define DEBUG_TYPE "si-mode-register" 36 STATISTIC(NumSetregInserted,
"Number of setreg of mode register inserted.");
48 Status(
unsigned Mask,
unsigned Mode) : Mask(Mask), Mode(Mode) {
61 return Status(Mask & ~newMask, Mode & ~newMask);
67 unsigned NewMask = (Mask & S.
Mask) & (Mode ^ ~S.
Mode);
68 unsigned NewMode = (Mode & NewMask);
69 return Status(NewMask, NewMode);
78 return (Mask == S.
Mask) && (Mode == S.
Mode);
88 return !(Mask & S.
Mask) || isCompatible(S);
123 std::vector<std::unique_ptr<BlockData>> BlockInfo;
124 std::queue<MachineBasicBlock *> Phase2List;
159 "Insert required mode register values",
false,
false)
175 case AMDGPU::V_INTERP_P1LL_F16:
176 case AMDGPU::V_INTERP_P1LV_F16:
177 case AMDGPU::V_INTERP_P2_F16:
182 return DefaultStatus;
195 while (InstrMode.
Mask) {
196 unsigned Offset = countTrailingZeros<unsigned>(InstrMode.
Mask);
197 unsigned Width = countTrailingOnes<unsigned>(InstrMode.
Mask >>
Offset);
199 BuildMI(MBB, MI, 0, TII->get(AMDGPU::S_SETREG_IMM32_B32))
205 InstrMode.
Mask &= ~(((1 << Width) - 1) <<
Offset);
230 auto NewInfo = llvm::make_unique<BlockData>();
237 bool RequirePending =
true;
240 Status InstrMode = getInstructionMode(MI, TII);
241 if ((MI.
getOpcode() == AMDGPU::S_SETREG_B32) ||
242 (MI.
getOpcode() == AMDGPU::S_SETREG_IMM32_B32)) {
259 if (InsertionPoint) {
260 insertSetreg(MBB, InsertionPoint, TII, IPChange.
delta(NewInfo->Change));
261 InsertionPoint =
nullptr;
266 if (MI.
getOpcode() == AMDGPU::S_SETREG_IMM32_B32) {
272 RequirePending =
false;
273 NewInfo->Change = NewInfo->Change.merge(Setreg);
275 NewInfo->Change = NewInfo->Change.mergeUnknown(Mask);
277 }
else if (!NewInfo->Change.isCompatible(InstrMode)) {
280 if (InsertionPoint) {
285 if (RequirePending) {
289 NewInfo->FirstInsertionPoint = InsertionPoint;
290 NewInfo->Require = NewInfo->Change;
291 RequirePending =
false;
293 insertSetreg(MBB, InsertionPoint, TII,
294 IPChange.
delta(NewInfo->Change));
295 IPChange = NewInfo->Change;
298 InsertionPoint = &
MI;
300 NewInfo->Change = NewInfo->Change.merge(InstrMode);
304 InsertionPoint = &
MI;
305 IPChange = NewInfo->Change;
306 NewInfo->Change = NewInfo->Change.
merge(InstrMode);
310 if (RequirePending) {
313 NewInfo->FirstInsertionPoint = InsertionPoint;
314 NewInfo->Require = NewInfo->Change;
315 }
else if (InsertionPoint) {
317 insertSetreg(MBB, InsertionPoint, TII, IPChange.
delta(NewInfo->Change));
319 NewInfo->Exit = NewInfo->Change;
320 BlockInfo[MBB.getNumber()] = std::move(NewInfo);
333 BlockInfo[ThisBlock]->Pred = DefaultStatus;
339 BlockInfo[ThisBlock]->Pred = BlockInfo[PB.
getNumber()]->Exit;
341 for (P = std::next(P); P !=
E; P = std::next(P)) {
343 BlockInfo[ThisBlock]->Pred = BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[Pred->
getNumber()]->Exit);
346 Status TmpStatus = BlockInfo[ThisBlock]->Pred.
merge(BlockInfo[ThisBlock]->Change);
347 if (BlockInfo[ThisBlock]->Exit != TmpStatus) {
348 BlockInfo[ThisBlock]->Exit = TmpStatus;
353 S !=
E; S = std::next(S)) {
367 if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) {
368 Status Delta = BlockInfo[ThisBlock]->Pred.
delta(BlockInfo[ThisBlock]->Require);
369 if (BlockInfo[ThisBlock]->FirstInsertionPoint)
370 insertSetreg(MBB, BlockInfo[ThisBlock]->FirstInsertionPoint, TII, Delta);
386 processBlockPhase1(BB, TII);
392 Phase2List.push(&BB);
393 while (!Phase2List.empty()) {
394 processBlockPhase2(*Phase2List.front(),
TII);
401 processBlockPhase3(BB, TII);
405 return NumSetregInserted > 0;
AMDGPU specific subclass of TargetSubtarget.
This class represents lattice values for constants.
MachineInstr & instr_front()
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
Status merge(const Status &S) const
const SIInstrInfo * getInstrInfo() const override
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
STATISTIC(NumSetregInserted, "Number of setreg of mode register inserted.")
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
#define FP_ROUND_ROUND_TO_ZERO
bool isCombinable(Status &S)
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
bool isCompatible(Status &S)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
MachineInstr * FirstInsertionPoint
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define FP_ROUND_MODE_DP(x)
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
FunctionPass * createSIModeRegisterPass()
std::vector< MachineBasicBlock * >::iterator pred_iterator
Status intersect(const Status &S) const
succ_iterator succ_begin()
pred_iterator pred_begin()
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
#define FP_ROUND_ROUND_TO_NEAREST
static bool usesFPDPRounding(const MachineInstr &MI)
bool operator!=(const Status &S) const
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Status delta(const Status &S) const
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
Status mergeUnknown(unsigned newMask)
Status(unsigned Mask, unsigned Mode)
Representation of each machine instruction.
Interface definition for SIInstrInfo.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
LLVM Value Representation.
bool operator==(const Status &S) const
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
std::vector< MachineBasicBlock * >::iterator succ_iterator