15 #if defined(_MSC_VER) || defined(__MINGW32__) 17 #define _USE_MATH_DEFINES 89 #define DEBUG_TYPE "si-lower" 91 STATISTIC(NumTailCalls,
"Number of tail calls");
94 "amdgpu-vgpr-index-mode",
95 cl::desc(
"Use GPR indexing mode instead of movrel for vector indexing"),
99 "amdgpu-frame-index-zero-bits",
100 cl::desc(
"High bits of frame index assumed to be zero"),
105 unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
108 return AMDGPU::SGPR0 +
Reg;
359 if (!Subtarget->
hasBFI()) {
726 EVT DestVT,
EVT SrcVT)
const {
782 EVT VT,
EVT &IntermediateVT,
783 unsigned &NumIntermediates,
MVT &RegisterVT)
const {
790 IntermediateVT = RegisterVT;
791 NumIntermediates = NumElts;
792 return NumIntermediates;
797 IntermediateVT = RegisterVT;
798 NumIntermediates = 2 * NumElts;
799 return NumIntermediates;
807 IntermediateVT = RegisterVT;
808 NumIntermediates = (NumElts + 1) / 2;
809 return NumIntermediates;
814 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
822 Type *ElementType =
nullptr;
836 unsigned Pow2Elts = 0;
837 unsigned ElementSize;
838 switch (ElementType->getTypeID()) {
842 ElementSize = cast<IntegerType>(ElementType)->
getBitWidth();
851 unsigned AdditionalElts = ElementSize == 16 ? 2 : 1;
861 unsigned IntrID)
const {
871 if (RsrcIntr->IsImage) {
925 if (!Vol || !Vol->
isZero())
938 Type *&AccessTy)
const {
957 bool SITargetLowering::isLegalFlatAddressingMode(
const AddrMode &AM)
const {
985 return isLegalFlatAddressingMode(AM);
988 return isLegalMUBUFAddressingMode(AM);
991 bool SITargetLowering::isLegalMUBUFAddressingMode(
const AddrMode &AM)
const {
1042 return isLegalMUBUFAddressingMode(AM);
1076 return isLegalMUBUFAddressingMode(AM);
1100 return isLegalFlatAddressingMode(AM);
1122 bool *IsFast)
const {
1140 bool AlignedBy4 = (Align % 4 == 0);
1142 *IsFast = AlignedBy4;
1153 bool AlignedBy4 = Align >= 4;
1155 *IsFast = AlignedBy4;
1166 (Align % 4 == 0) :
true;
1186 unsigned SrcAlign,
bool IsMemset,
1195 if (Size >= 16 && DstAlign >= 4)
1198 if (Size >= 8 && DstAlign >= 4)
1212 unsigned DestAS)
const {
1217 const MemSDNode *MemNode = cast<MemSDNode>(
N);
1219 const Instruction *
I = dyn_cast_or_null<Instruction>(Ptr);
1224 unsigned DestAS)
const {
1234 const MemSDNode *MemNode = cast<MemSDNode>(
N);
1290 std::tie(InputPtrReg, RC)
1302 const SDLoc &SL)
const {
1319 Val = getFPExtOrFPTrunc(DAG, Val, SL, VT);
1328 SDValue SITargetLowering::lowerKernargMemParameter(
1331 uint64_t Offset,
unsigned Align,
bool Signed,
1342 int64_t AlignDownOffset =
alignDown(Offset, 4);
1343 int64_t OffsetDiff = Offset - AlignDownOffset;
1349 SDValue Ptr = lowerKernArgParameterPtr(DAG, SL, Chain, AlignDownOffset);
1359 ArgVal = convertArgType(DAG, VT, MemVT, SL, ArgVal, Signed, Arg);
1365 SDValue Ptr = lowerKernArgParameterPtr(DAG, SL, Chain, Offset);
1370 SDValue Val = convertArgType(DAG, VT, MemVT, SL, Load, Signed, Arg);
1417 ExtType, SL, VA.
getLocVT(), Chain, FIN,
1440 for (
unsigned I = 0,
E = Ins.
size(), PSInputNum = 0;
I !=
E; ++
I) {
1444 "vector type argument should have been split");
1458 "unexpected vector split in ps argument type");
1489 unsigned Reg = AMDGPU::VGPR0;
1490 MF.
addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
1497 unsigned Reg = AMDGPU::VGPR1;
1498 MF.
addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
1505 unsigned Reg = AMDGPU::VGPR2;
1506 MF.
addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
1519 if (RegIdx == ArgVGPRs.
size()) {
1526 unsigned Reg = ArgVGPRs[RegIdx];
1528 assert(Reg != AMDGPU::NoRegister);
1531 MF.
addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
1537 unsigned NumArgRegs) {
1540 if (RegIdx == ArgSGPRs.
size())
1543 unsigned Reg = ArgSGPRs[RegIdx];
1545 assert(Reg != AMDGPU::NoRegister);
1616 MF.
addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
1623 MF.
addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
1629 MF.
addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
1635 MF.
addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
1641 MF.
addLiveIn(InputPtrReg, &AMDGPU::SGPR_64RegClass);
1647 MF.
addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
1653 MF.
addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
1669 MF.
addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
1675 MF.
addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
1681 MF.
addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
1687 MF.
addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
1693 unsigned PrivateSegmentWaveByteOffsetReg;
1696 PrivateSegmentWaveByteOffsetReg =
1701 if (PrivateSegmentWaveByteOffsetReg == AMDGPU::NoRegister) {
1708 MF.
addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass);
1709 CCInfo.
AllocateReg(PrivateSegmentWaveByteOffsetReg);
1724 if (HasStackObjects)
1730 HasStackObjects =
true;
1734 bool RequiresStackAccess = HasStackObjects || MFI.
hasCalls();
1738 if (RequiresStackAccess) {
1756 unsigned ReservedOffsetReg
1765 unsigned ReservedBufferReg
1767 unsigned ReservedOffsetReg
1786 if (HasStackObjects && !MFI.
hasCalls()) {
1791 unsigned ReservedOffsetReg
1821 if (AMDGPU::SReg_64RegClass.
contains(*
I))
1822 RC = &AMDGPU::SGPR_64RegClass;
1823 else if (AMDGPU::SReg_32RegClass.
contains(*
I))
1824 RC = &AMDGPU::SGPR_32RegClass;
1835 for (
auto *Exit : Exits)
1837 TII->
get(TargetOpcode::COPY), *
I)
1856 Fn,
"unsupported non-compute shaders with HSA", DL.
getDebugLoc());
1863 if (ST.debuggerEmitPrologue())
1864 createDebuggerPrologueStackObjects(MF);
1879 CCInfo.AllocateStack(4, 4);
1902 CCInfo.AllocateReg(AMDGPU::VGPR0);
1903 CCInfo.AllocateReg(AMDGPU::VGPR1);
1917 if ((PsInputBits & 0x7F) == 0 ||
1918 ((PsInputBits & 0xF) == 0 &&
1919 (PsInputBits >> 11 & 1)))
1931 }
else if (IsKernel) {
1946 CCInfo.AnalyzeFormalArguments(Splits, AssignFn);
1956 const unsigned KernelArgBaseAlign = 16;
1958 for (
unsigned i = 0, e = Ins.
size(), ArgIdx = 0; i != e; ++i) {
1968 if (IsEntryFunc && VA.
isMemLoc()) {
1973 unsigned Align =
MinAlign(KernelArgBaseAlign, Offset);
1975 SDValue Arg = lowerKernargMemParameter(
1976 DAG, VT, MemVT, DL, Chain, Offset, Align, Ins[i].Flags.isSExt(), &Ins[i]);
1992 }
else if (!IsEntryFunc && VA.
isMemLoc()) {
1993 SDValue Val = lowerStackParameter(DAG, VA, DL, Chain, Arg);
2064 auto &ArgUsageInfo =
2068 unsigned StackArgSize = CCInfo.getNextStackOffset();
2071 return Chains.
empty() ? Chain :
2089 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
2139 Chain = DAG.
getCopyToReg(Chain, DL, PhysReturnAddrReg, ReturnAddrReg, Flag);
2146 for (
unsigned I = 0, RealRVLocIdx = 0,
E = RVLocs.
size();
I !=
E;
2147 ++
I, ++RealRVLocIdx) {
2151 SDValue Arg = OutVals[RealRVLocIdx];
2185 if (AMDGPU::SReg_64RegClass.
contains(*I))
2187 else if (AMDGPU::SReg_32RegClass.
contains(*I))
2220 for (
unsigned i = 0; i != RVLocs.
size(); ++i) {
2284 auto &ArgUsageInfo =
2287 = ArgUsageInfo.lookupFuncArgInfo(*CalleeFunc);
2308 for (
auto InputID : InputRegs) {
2312 std::tie(OutgoingArg, ArgRC) = CalleeArgInfo.getPreloadedValue(InputID);
2318 std::tie(IncomingArg, IncomingArgRC)
2320 assert(IncomingArgRC == ArgRC);
2331 assert(InputID == AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR);
2332 InputReg = getImplicitArgPtr(DAG, DL);
2336 RegsToPass.emplace_back(OutgoingArg->
getRegister(), InputReg);
2376 if (!CallerPreserved)
2379 bool CCMatch = CallerCC == CalleeCC;
2392 if (Arg.hasByValAttr())
2407 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2416 CCState CCInfo(CalleeCC, IsVarArg, MF, ArgLocs, Ctx);
2440 return (Attr.getValueAsString() !=
"true");
2456 bool IsSibCall =
false;
2457 bool IsThisReturn =
false;
2462 "unsupported call to variadic function ");
2470 "unsupported indirect call to function ");
2475 "unsupported required tail call to function ");
2482 "unsupported call from graphics shader of function ");
2488 Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
2491 "site marked musttail");
2498 if (!TailCallOpt && IsTailCall)
2513 CCInfo.AllocateStack(4, 4);
2515 CCInfo.AnalyzeCallOperands(Outs, AssignFn);
2518 unsigned NumBytes = CCInfo.getNextStackOffset();
2547 RegsToPass.
emplace_back(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, ScratchRSrcReg);
2552 RegsToPass.
emplace_back(AMDGPU::SGPR4, ScratchWaveOffsetReg);
2566 for (
unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e;
2567 ++i, ++realArgIdx) {
2569 SDValue Arg = OutVals[realArgIdx];
2603 int32_t Offset = LocMemOffset;
2610 unsigned OpSize = Flags.
isByVal() ?
2617 Offset = Offset + FPDiff;
2637 if (Outs[i].Flags.isByVal()) {
2641 Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(),
2658 if (!MemOpChains.
empty())
2664 for (
auto &RegToPass : RegsToPass) {
2666 RegToPass.second, InFlag);
2682 Chain = DAG.
getCopyToReg(Chain, DL, PhysReturnAddrReg, ReturnAddrReg, InFlag);
2690 if (IsTailCall && !IsSibCall) {
2698 std::vector<SDValue> Ops;
2699 Ops.push_back(Chain);
2700 Ops.push_back(Callee);
2708 Ops.push_back(PhysReturnAddrReg);
2713 for (
auto &RegToPass : RegsToPass) {
2715 RegToPass.second.getValueType()));
2722 assert(Mask &&
"Missing call preserved mask for calling convention");
2726 Ops.push_back(InFlag);
2742 if (CallerSavedFP) {
2744 Chain = DAG.
getCopyToReg(Chain, DL, FPReg, CallerSavedFP, InFlag);
2748 uint64_t CalleePopBytes = NumBytes;
2757 return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
2758 InVals, IsThisReturn,
2759 IsThisReturn ? OutVals[0] :
SDValue());
2765 .Case(
"m0", AMDGPU::M0)
2766 .
Case(
"exec", AMDGPU::EXEC)
2767 .
Case(
"exec_lo", AMDGPU::EXEC_LO)
2768 .
Case(
"exec_hi", AMDGPU::EXEC_HI)
2769 .
Case(
"flat_scratch", AMDGPU::FLAT_SCR)
2770 .
Case(
"flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2771 .
Case(
"flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2774 if (Reg == AMDGPU::NoRegister) {
2783 +
StringRef(RegName) +
"\" for subtarget."));
2788 case AMDGPU::EXEC_LO:
2789 case AMDGPU::EXEC_HI:
2790 case AMDGPU::FLAT_SCR_LO:
2791 case AMDGPU::FLAT_SCR_HI:
2796 case AMDGPU::FLAT_SCR:
2817 if (SplitPoint == BB->
end()) {
2852 unsigned InitSaveExecReg,
2855 bool IsIndirectSrc) {
2863 BuildMI(LoopBB, I, DL, TII->get(TargetOpcode::PHI), PhiReg)
2869 BuildMI(LoopBB, I, DL, TII->get(TargetOpcode::PHI), PhiExec)
2870 .addReg(InitSaveExecReg)
2876 BuildMI(LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), CurrentIdxReg)
2880 BuildMI(LoopBB, I, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e64), CondReg)
2881 .addReg(CurrentIdxReg)
2885 BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), NewExec)
2890 if (UseGPRIdxMode) {
2893 IdxReg = CurrentIdxReg;
2896 BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_ADD_I32), IdxReg)
2900 unsigned IdxMode = IsIndirectSrc ?
2903 BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_ON))
2910 BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2913 BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
2921 BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
2922 .addReg(AMDGPU::EXEC)
2929 BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
2943 unsigned InitResultReg,
2947 bool IsIndirectSrc) {
2957 BuildMI(MBB, I, DL, TII->get(TargetOpcode::IMPLICIT_DEF), TmpExec);
2960 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B64), SaveExec)
2961 .addReg(AMDGPU::EXEC);
2970 MF->
insert(MBBI, LoopBB);
2971 MF->
insert(MBBI, RemainderBB);
2985 InitResultReg, DstReg, PhiReg, TmpExec,
2986 Offset, UseGPRIdxMode, IsIndirectSrc);
2989 BuildMI(*RemainderBB, First, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
2996 static std::pair<unsigned, int>
3001 int NumElts = TRI.getRegSizeInBits(*SuperRC) / 32;
3005 if (Offset >= NumElts || Offset < 0)
3006 return std::make_pair(AMDGPU::sub0, Offset);
3008 return std::make_pair(AMDGPU::sub0 + Offset, 0);
3017 bool IsIndirectSrc) {
3030 if (UseGPRIdxMode) {
3031 unsigned IdxMode = IsIndirectSrc ?
3035 BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_ON))
3042 BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), Tmp)
3046 BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_ON))
3057 BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
3060 BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
3084 std::tie(SubReg, Offset)
3093 if (UseGPRIdxMode) {
3097 BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), Dst)
3101 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF));
3103 BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
3119 BuildMI(MBB, I, DL, TII->get(TargetOpcode::IMPLICIT_DEF), InitReg);
3122 Offset, UseGPRIdxMode,
true);
3125 if (UseGPRIdxMode) {
3126 BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::V_MOV_B32_e32), Dst)
3130 BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF));
3132 BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
3144 switch (TRI.getRegSizeInBits(*VecRC)) {
3146 return AMDGPU::V_MOVRELD_B32_V1;
3148 return AMDGPU::V_MOVRELD_B32_V2;
3150 return AMDGPU::V_MOVRELD_B32_V4;
3152 return AMDGPU::V_MOVRELD_B32_V8;
3154 return AMDGPU::V_MOVRELD_B32_V16;
3184 if (Idx->
getReg() == AMDGPU::NoRegister) {
3190 BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dst)
3203 if (UseGPRIdxMode) {
3204 BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOV_B32_indirect))
3211 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF));
3215 BuildMI(MBB, I, DL, MovRelDesc)
3219 .addImm(SubReg - AMDGPU::sub0);
3234 Offset, UseGPRIdxMode,
false);
3237 if (UseGPRIdxMode) {
3238 BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::V_MOV_B32_indirect))
3244 BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF));
3248 BuildMI(*LoopBB, InsPt, DL, MovRelDesc)
3252 .
addImm(SubReg - AMDGPU::sub0);
3278 case AMDGPU::S_ADD_U64_PSEUDO:
3279 case AMDGPU::S_SUB_U64_PSEUDO: {
3291 Src0, &AMDGPU::SReg_64RegClass, AMDGPU::sub0,
3292 &AMDGPU::SReg_32_XM0RegClass);
3294 Src0, &AMDGPU::SReg_64RegClass, AMDGPU::sub1,
3295 &AMDGPU::SReg_32_XM0RegClass);
3298 Src1, &AMDGPU::SReg_64RegClass, AMDGPU::sub0,
3299 &AMDGPU::SReg_32_XM0RegClass);
3301 Src1, &AMDGPU::SReg_64RegClass, AMDGPU::sub1,
3302 &AMDGPU::SReg_32_XM0RegClass);
3304 bool IsAdd = (MI.
getOpcode() == AMDGPU::S_ADD_U64_PSEUDO);
3306 unsigned LoOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
3307 unsigned HiOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
3308 BuildMI(*BB, MI, DL, TII->get(LoOpc), DestSub0)
3311 BuildMI(*BB, MI, DL, TII->get(HiOpc), DestSub1)
3314 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest.
getReg())
3322 case AMDGPU::SI_INIT_M0: {
3324 TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
3329 case AMDGPU::SI_INIT_EXEC:
3337 case AMDGPU::SI_INIT_EXEC_FROM_INPUT: {
3353 if (
I->getOpcode() != TargetOpcode::COPY ||
3354 I->getOperand(0).getReg() != InputReg)
3358 FirstMI = &*++BB->
begin();
3360 I->removeFromParent();
3370 BuildMI(*BB, FirstMI,
DebugLoc(), TII->get(AMDGPU::S_BFE_U32), CountReg)
3387 case AMDGPU::GET_GROUPSTATICSIZE: {
3389 BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_MOV_B32))
3395 case AMDGPU::SI_INDIRECT_SRC_V1:
3396 case AMDGPU::SI_INDIRECT_SRC_V2:
3397 case AMDGPU::SI_INDIRECT_SRC_V4:
3398 case AMDGPU::SI_INDIRECT_SRC_V8:
3399 case AMDGPU::SI_INDIRECT_SRC_V16:
3401 case AMDGPU::SI_INDIRECT_DST_V1:
3402 case AMDGPU::SI_INDIRECT_DST_V2:
3403 case AMDGPU::SI_INDIRECT_DST_V4:
3404 case AMDGPU::SI_INDIRECT_DST_V8:
3405 case AMDGPU::SI_INDIRECT_DST_V16:
3407 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
3408 case AMDGPU::SI_KILL_I1_PSEUDO:
3410 case AMDGPU::V_CNDMASK_B64_PSEUDO: {
3423 BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), SrcCondCopy)
3425 BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstLo)
3426 .addReg(Src0, 0, AMDGPU::sub0)
3427 .
addReg(Src1, 0, AMDGPU::sub0)
3429 BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstHi)
3430 .addReg(Src0, 0, AMDGPU::sub1)
3431 .
addReg(Src1, 0, AMDGPU::sub1)
3434 BuildMI(*BB, MI, DL, TII->get(AMDGPU::REG_SEQUENCE), Dst)
3442 case AMDGPU::SI_BR_UNDEF: {
3451 case AMDGPU::ADJCALLSTACKUP:
3452 case AMDGPU::ADJCALLSTACKDOWN: {
3464 case AMDGPU::SI_CALL_ISEL:
3465 case AMDGPU::SI_TCRETURN_ISEL: {
3478 if (MI.
getOpcode() == AMDGPU::SI_CALL_ISEL) {
3479 MIB =
BuildMI(*BB, MI, DL, TII->get(AMDGPU::SI_CALL), ReturnAddrReg)
3481 .addGlobalAddress(G);
3483 MIB =
BuildMI(*BB, MI, DL, TII->get(AMDGPU::SI_TCRETURN))
3485 .addGlobalAddress(G);
3625 SDValue Result = LowerLOAD(Op, DAG);
3628 "Load should return a value and a chain");
3634 return LowerTrig(Op, DAG);
3636 case ISD::FDIV:
return LowerFDIV(Op, DAG);
3642 return LowerGlobalAddress(MFI, Op, DAG);
3649 return lowerINSERT_VECTOR_ELT(Op, DAG);
3651 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
3653 return lowerBUILD_VECTOR(Op, DAG);
3655 return lowerFP_ROUND(Op, DAG);
3657 return lowerTRAP(Op, DAG);
3659 return lowerDEBUGTRAP(Op, DAG);
3666 return lowerFMINNUM_FMAXNUM(Op, DAG);
3713 SDValue SITargetLowering::adjustLoadValueType(
unsigned Opcode,
3717 bool IsIntrinsic)
const {
3723 EVT EquivLoadVT = LoadVT;
3724 if (Unpacked && LoadVT.
isVector()) {
3754 if (CondCode < ICmpInst::Predicate::FIRST_ICMP_PREDICATE ||
3755 CondCode > ICmpInst::Predicate::LAST_ICMP_PREDICATE)
3788 if (CondCode < FCmpInst::Predicate::FIRST_FCMP_PREDICATE ||
3789 CondCode > FCmpInst::Predicate::LAST_FCMP_PREDICATE) {
3824 unsigned IID = cast<ConstantSDNode>(N->
getOperand(0))->getZExtValue();
3881 EVT SelectVT = NewVT;
3891 if (NewVT != SelectVT)
3934 if (
I.getUse().get() != Value)
3937 if (
I->getOpcode() == Opcode)
3943 unsigned SITargetLowering::isCFIntrinsic(
const SDNode *
Intr)
const {
3945 switch (cast<ConstantSDNode>(Intr->
getOperand(1))->getZExtValue()) {
3964 void SITargetLowering::createDebuggerPrologueStackObjects(
3980 for (
unsigned i = 0; i < 3; ++i) {
3990 bool SITargetLowering::shouldEmitFixup(
const GlobalValue *GV)
const {
3997 bool SITargetLowering::shouldEmitGOTReloc(
const GlobalValue *GV)
const {
4001 !shouldEmitFixup(GV) &&
4005 bool SITargetLowering::shouldEmitPCReloc(
const GlobalValue *GV)
const {
4006 return !shouldEmitFixup(GV) && !shouldEmitGOTReloc(GV);
4038 unsigned CFNode = isCFIntrinsic(Intr);
4088 for (
unsigned i = 1, e = Intr->
getNumValues() - 1; i != e; ++i) {
4121 "Do not know how to custom lower FP_ROUND for non-f16 type");
4124 EVT SrcVT = Src.getValueType();
4163 assert(UserSGPR != AMDGPU::NoRegister);
4165 DAG, &AMDGPU::SReg_64RegClass, UserSGPR,
MVT::i64);
4186 "debugtrap handler not supported",
4201 SDValue SITargetLowering::getSegmentAperture(
unsigned AS,
const SDLoc &DL,
4226 assert(UserSGPR != AMDGPU::NoRegister);
4229 DAG, &AMDGPU::SReg_64RegClass, UserSGPR,
MVT::i64);
4273 NonNull, Ptr, SegmentNullPtr);
4326 if (NumElts == 4 && EltSize == 16 && KIdx) {
4337 unsigned Idx = KIdx->getZExtValue();
4338 bool InsertLo = Idx < 2;
4340 InsertLo ? LoVec : HiVec,
4353 if (isa<ConstantSDNode>(Idx))
4379 DAG.
getNOT(SL, BFM, IntVT), BCVec);
4403 if (
SDValue Combined = performExtractVectorEltCombine(Op.
getNode(), DCI))
4487 const SDLoc &DL,
unsigned Offset,
EVT PtrVT,
4523 GAFlags : GAFlags + 1);
4541 if (shouldEmitFixup(GV))
4543 else if (shouldEmitPCReloc(GV))
4581 unsigned Offset)
const {
4593 "non-hsa intrinsic with hsa target",
4602 "intrinsic not supported on subtarget",
4614 if (Elts.
size() == 1) {
4617 }
else if (Elts.
size() == 2) {
4620 }
else if (Elts.
size() <= 4) {
4623 }
else if (Elts.
size() <= 8) {
4633 for (
unsigned i = 0; i < Elts.
size(); ++i) {
4639 for (
unsigned i = Elts.
size(); i < NumElts; ++i)
4650 if (!CachePolicyConst)
4653 uint64_t
Value = CachePolicyConst->getZExtValue();
4654 SDLoc DL(CachePolicy);
4657 Value &= ~(uint64_t)0x1;
4661 Value &= ~(uint64_t)0x2;
4673 bool IsTexFail,
bool Unpacked,
bool IsD16,
4674 int DMaskPop,
int NumVDataDwords,
4677 EVT ReqRetVT = ResultTypes[0];
4680 EVT AdjEltVT = Unpacked && IsD16 ?
MVT::i32 : ReqRetEltVT;
4681 EVT AdjVT = Unpacked ? ReqRetNumElts > 1 ?
EVT::getVectorVT(Context, AdjEltVT, ReqRetNumElts)
4688 if (IsD16 && !Unpacked)
4689 NumElts = NumVDataDwords << 1;
4691 NumElts = NumVDataDwords;
4698 bool V8F16Special =
false;
4702 ReqRetNumElts >>= 1;
4703 V8F16Special =
true;
4718 int ExtraElts = ReqRetNumElts - DMaskPop;
4723 if (ReqRetNumElts > 1) {
4725 if (IsD16 && Unpacked)
4730 PreTFCRes = BVElts[0];
4751 SDValue *LWE,
bool &IsTexFail) {
4753 if (!TexFailCtrlConst)
4756 uint64_t
Value = TexFailCtrlConst->getZExtValue();
4761 SDLoc DL(TexFailCtrlConst);
4763 Value &= ~(uint64_t)0x1;
4765 Value &= ~(uint64_t)0x2;
4771 const AMDGPU::ImageDimIntrinsicInfo *Intr,
4776 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4779 const AMDGPU::MIMGLZMappingInfo *LZMappingInfo =
4781 unsigned IntrOpcode = Intr->BaseOpcode;
4789 bool AdjustRetType =
false;
4793 unsigned DMaskLanes = 0;
4795 if (BaseOpcode->Atomic) {
4799 if (BaseOpcode->AtomicX2) {
4807 DMask = Is64Bit ? 0xf : 0x3;
4808 NumVDataDwords = Is64Bit ? 4 : 2;
4811 DMask = Is64Bit ? 0x3 : 0x1;
4812 NumVDataDwords = Is64Bit ? 2 : 1;
4816 unsigned DMaskIdx = BaseOpcode->Store ? 3 : isa<MemSDNode>(
Op) ? 2 : 1;
4820 DMask = DMaskConst->getZExtValue();
4823 if (BaseOpcode->Store) {
4829 !BaseOpcode->HasD16)
4833 VData = handleD16VData(VData, DAG);
4840 MVT LoadVT = ResultTypes[0].getSimpleVT();
4843 !BaseOpcode->HasD16)
4851 (!LoadVT.
isVector() && DMaskLanes > 1))
4855 NumVDataDwords = (DMaskLanes + 1) / 2;
4857 NumVDataDwords = DMaskLanes;
4859 AdjustRetType =
true;
4862 AddrIdx = DMaskIdx + 1;
4865 unsigned NumGradients = BaseOpcode->Gradients ? DimInfo->NumGradients : 0;
4866 unsigned NumCoords = BaseOpcode->Coordinates ? DimInfo->NumCoords : 0;
4867 unsigned NumLCM = BaseOpcode->LodOrClampOrMip ? 1 : 0;
4868 unsigned NumVAddrs = BaseOpcode->NumExtraArgs + NumGradients +
4870 unsigned NumMIVAddrs = NumVAddrs;
4875 if (LZMappingInfo) {
4876 if (
auto ConstantLod =
4877 dyn_cast<ConstantFPSDNode>(Op.
getOperand(AddrIdx+NumVAddrs-1))) {
4878 if (ConstantLod->isZero() || ConstantLod->isNegative()) {
4879 IntrOpcode = LZMappingInfo->LZ;
4886 unsigned DimIdx = AddrIdx + BaseOpcode->NumExtraArgs;
4890 ST->hasFeature(AMDGPU::FeatureR128A16)) {
4893 for (
unsigned i = AddrIdx; i < (AddrIdx + NumMIVAddrs); ++i) {
4902 if (((i + 1) >= (AddrIdx + NumMIVAddrs)) ||
4903 ((NumGradients / 2) % 2 == 1 &&
4904 (i == DimIdx + (NumGradients / 2) - 1 ||
4905 i == DimIdx + NumGradients - 1))) {
4918 for (
unsigned i = 0; i < NumMIVAddrs; ++i)
4928 if (!BaseOpcode->Sampler) {
4930 CtrlIdx = AddrIdx + NumVAddrs + 1;
4937 Unorm = UnormConst->getZExtValue() ? True : False;
4938 CtrlIdx = AddrIdx + NumVAddrs + 3;
4944 bool IsTexFail =
false;
4945 if (!
parseTexFail(TexFail, DAG, &TFE, &LWE, IsTexFail))
4956 NumVDataDwords += 1;
4957 AdjustRetType =
true;
4962 if (AdjustRetType) {
4964 if (DMaskLanes == 0 && !BaseOpcode->Store) {
4967 if (isa<MemSDNode>(Op))
4975 EVT NewVT = NumVDataDwords > 1 ?
4979 ResultTypes[0] = NewVT;
4980 if (ResultTypes.size() == 3) {
4984 ResultTypes.erase(&ResultTypes[1]);
4990 if (BaseOpcode->Atomic) {
5000 if (BaseOpcode->Store || BaseOpcode->Atomic)
5004 if (BaseOpcode->Sampler)
5011 ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False);
5014 Ops.
push_back(DimInfo->DA ? True : False);
5015 if (BaseOpcode->HasD16)
5017 if (isa<MemSDNode>(Op))
5025 NumVDataDwords, NumVAddrDwords);
5028 NumVDataDwords, NumVAddrDwords);
5032 if (
auto MemOp = dyn_cast<MemSDNode>(Op)) {
5037 if (BaseOpcode->AtomicX2) {
5041 }
else if (!BaseOpcode->Store) {
5043 OrigResultTypes, IsTexFail,
5045 DMaskLanes, NumVDataDwords, DL,
5075 unsigned NumLoads = 1;
5082 if (NumElts == 8 || NumElts == 16) {
5083 NumLoads = NumElts == 16 ? 4 : 2;
5088 unsigned CachePolicy = cast<ConstantSDNode>(GLC)->getZExtValue();
5102 setBufferOffsets(Offset, DAG, &Ops[3], NumLoads > 1 ? 16 * NumLoads : 4);
5104 uint64_t InstOffset = cast<ConstantSDNode>(Ops[5])->getZExtValue();
5105 for (
unsigned i = 0; i < NumLoads; ++i) {
5124 unsigned IntrinsicID = cast<ConstantSDNode>(Op.
getOperand(0))->getZExtValue();
5128 switch (IntrinsicID) {
5132 return getPreloadedValue(DAG, *MFI, VT,
5139 MF.
getFunction(),
"unsupported hsa intrinsic without hsa target",
5147 return getPreloadedValue(DAG, *MFI, VT, RegID);
5151 return getImplicitArgPtr(DAG, DL);
5152 return getPreloadedValue(DAG, *MFI, VT,
5156 return getPreloadedValue(DAG, *MFI, VT,
5193 return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.
getEntryNode(),
5199 return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.
getEntryNode(),
5205 return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.
getEntryNode(),
5211 return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.
getEntryNode(),
5217 return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.
getEntryNode(),
5223 return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.
getEntryNode(),
5229 return lowerImplicitZextParam(DAG, Op,
MVT::i16,
5235 return lowerImplicitZextParam(DAG, Op,
MVT::i16,
5241 return lowerImplicitZextParam(DAG, Op,
MVT::i16,
5245 return getPreloadedValue(DAG, *MFI, VT,
5249 return getPreloadedValue(DAG, *MFI, VT,
5253 return getPreloadedValue(DAG, *MFI, VT,
5259 MFI->getArgInfo().WorkItemIDX);
5264 MFI->getArgInfo().WorkItemIDY);
5269 MFI->getArgInfo().WorkItemIDZ);
5270 case SIIntrinsic::SI_load_const: {
5277 unsigned Cache = cast<ConstantSDNode>(Op.
getOperand(3))->getZExtValue();
5282 return lowerFDIV_FAST(Op, DAG);
5313 MF.
getFunction(),
"intrinsic not supported on subtarget",
5359 Denominator, Numerator);
5431 if (
const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
5433 return lowerImage(Op, ImageDimIntr, DAG);
5441 unsigned IntrID = cast<ConstantSDNode>(Op.
getOperand(1))->getZExtValue();
5454 unsigned ShaderType;
5466 if (WaveDone && !WaveRelease)
5487 unsigned Offset0 = OrderedCountIndex << 2;
5488 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) |
5490 unsigned Offset = Offset0 | (Offset1 << 8);
5539 unsigned Glc = cast<ConstantSDNode>(Op.
getOperand(5))->getZExtValue();
5540 unsigned Slc = cast<ConstantSDNode>(Op.
getOperand(6))->getZExtValue();
5542 if (
auto Idx = dyn_cast<ConstantSDNode>(Op.
getOperand(3)))
5543 IdxEn = Idx->getZExtValue() != 0;
5555 setBufferOffsets(Op.
getOperand(4), DAG, &Ops[3]);
5561 auto *M = cast<MemSDNode>(
Op);
5564 if (LoadVT.getScalarType() ==
MVT::f16)
5589 auto *M = cast<MemSDNode>(
Op);
5592 if (LoadVT.getScalarType() ==
MVT::f16)
5617 auto *M = cast<MemSDNode>(
Op);
5620 if (LoadVT.getScalarType() ==
MVT::f16)
5630 unsigned Dfmt = cast<ConstantSDNode>(Op.
getOperand(7))->getZExtValue();
5631 unsigned Nfmt = cast<ConstantSDNode>(Op.
getOperand(8))->getZExtValue();
5632 unsigned Glc = cast<ConstantSDNode>(Op.
getOperand(9))->getZExtValue();
5633 unsigned Slc = cast<ConstantSDNode>(Op.
getOperand(10))->getZExtValue();
5635 if (
auto Idx = dyn_cast<ConstantSDNode>(Op.
getOperand(3)))
5636 IdxEn = Idx->getZExtValue() != 0;
5649 if (LoadVT.getScalarType() ==
MVT::f16)
5673 if (LoadVT.getScalarType() ==
MVT::f16)
5697 if (LoadVT.getScalarType() ==
MVT::f16)
5714 unsigned Slc = cast<ConstantSDNode>(Op.
getOperand(6))->getZExtValue();
5716 if (
auto Idx = dyn_cast<ConstantSDNode>(Op.
getOperand(4)))
5717 IdxEn = Idx->getZExtValue() != 0;
5729 setBufferOffsets(Op.
getOperand(5), DAG, &Ops[4]);
5732 auto *M = cast<MemSDNode>(
Op);
5733 unsigned Opcode = 0;
5797 auto *M = cast<MemSDNode>(
Op);
5798 unsigned Opcode = 0;
5862 auto *M = cast<MemSDNode>(
Op);
5863 unsigned Opcode = 0;
5904 unsigned Slc = cast<ConstantSDNode>(Op.
getOperand(7))->getZExtValue();
5906 if (
auto Idx = dyn_cast<ConstantSDNode>(Op.
getOperand(5)))
5907 IdxEn = Idx->getZExtValue() != 0;
5920 setBufferOffsets(Op.
getOperand(6), DAG, &Ops[5]);
5922 auto *M = cast<MemSDNode>(
Op);
5942 auto *M = cast<MemSDNode>(
Op);
5962 auto *M = cast<MemSDNode>(
Op);
5969 if (
const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
5971 return lowerImage(Op, ImageDimIntr, DAG);
6007 unsigned IntrinsicID = cast<ConstantSDNode>(Op.
getOperand(1))->getZExtValue();
6010 switch (IntrinsicID) {
6029 unsigned Opc = Done->isNullValue() ?
6089 VData = handleD16VData(VData, DAG);
6090 unsigned Dfmt = cast<ConstantSDNode>(Op.
getOperand(8))->getZExtValue();
6091 unsigned Nfmt = cast<ConstantSDNode>(Op.
getOperand(9))->getZExtValue();
6092 unsigned Glc = cast<ConstantSDNode>(Op.
getOperand(10))->getZExtValue();
6093 unsigned Slc = cast<ConstantSDNode>(Op.
getOperand(11))->getZExtValue();
6095 if (
auto Idx = dyn_cast<ConstantSDNode>(Op.
getOperand(4)))
6096 IdxEn = Idx->getZExtValue() != 0;
6120 VData = handleD16VData(VData, DAG);
6145 VData = handleD16VData(VData, DAG);
6171 VData = handleD16VData(VData, DAG);
6172 unsigned Glc = cast<ConstantSDNode>(Op.
getOperand(6))->getZExtValue();
6173 unsigned Slc = cast<ConstantSDNode>(Op.
getOperand(7))->getZExtValue();
6175 if (
auto Idx = dyn_cast<ConstantSDNode>(Op.
getOperand(4)))
6176 IdxEn = Idx->getZExtValue() != 0;
6188 setBufferOffsets(Op.
getOperand(5), DAG, &Ops[4]);
6202 VData = handleD16VData(VData, DAG);
6228 VData = handleD16VData(VData, DAG);
6250 if (
const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
6252 return lowerImage(Op, ImageDimIntr, DAG);
6265 std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
6268 const unsigned MaxImm = 4095;
6272 if ((C1 = dyn_cast<ConstantSDNode>(N0)))
6288 unsigned Overflow = ImmOffset & ~MaxImm;
6289 ImmOffset -= Overflow;
6290 if ((int32_t)Overflow < 0) {
6291 Overflow += ImmOffset;
6300 SDValue Ops[] = { N0, OverflowVal };
6315 void SITargetLowering::setBufferOffsets(
SDValue CombinedOffset,
6317 unsigned Align)
const {
6318 SDLoc DL(CombinedOffset);
6319 if (
auto C = dyn_cast<ConstantSDNode>(CombinedOffset)) {
6333 int Offset = cast<ConstantSDNode>(N1)->getSExtValue();
6335 Subtarget, Align)) {
6342 Offsets[0] = CombinedOffset;
6367 SDValue SITargetLowering::widenLoad(
LoadSDNode *Ld, DAGCombinerInfo &DCI)
const {
6383 if ((MemVT.
isSimple() && !DCI.isAfterLegalizeDAG()) ||
6390 "unexpected vector extload");
6406 "unexpected fp extload");
6424 DCI.AddToWorklist(Cvt.
getNode());
6429 DCI.AddToWorklist(Cvt.
getNode());
6457 BasePtr, RealMemVT, MMO);
6471 "Custom lowering for non-i32 vectors hasn't been implemented.");
6494 if (!Op->
isDivergent() && Alignment >= 4 && NumElements < 32)
6507 Alignment >= 4 && NumElements < 32)
6518 if (NumElements > 4)
6531 if (NumElements > 2)
6536 if (NumElements > 4)
6548 if (NumElements > 2)
6608 if (CLHS->isExactlyValue(1.0)) {
6628 if (CLHS->isExactlyValue(-1.0)) {
6649 return DAG.
getNode(Opcode, SL, VT, A, B);
6670 return DAG.
getNode(Opcode, SL, VT, A, B, C);
6688 if (
SDValue FastLowered = lowerFastUnsafeFDIV(Op, DAG))
6742 if (
SDValue FastLowered = lowerFastUnsafeFDIV(Op, DAG))
6776 EnableDenormValue, BitField);
6787 ApproxRcp, One, NegDivScale0);
6796 NumeratorScaled, Mul);
6801 NumeratorScaled, Fma3);
6804 const SDValue DisableDenormValue =
6813 DisableDenorm, DAG.
getRoot());
6819 Fma4, Fma1, Fma3, Scale);
6826 return lowerFastUnsafeFDIV(Op, DAG);
6854 NegDivScale0, Mul, DivScale1);
6886 Fma4, Fma3, Mul, Scale);
6895 return LowerFDIV32(Op, DAG);
6898 return LowerFDIV64(Op, DAG);
6901 return LowerFDIV16(Op, DAG);
6934 unsigned NumElements = VT.getVectorNumElements();
6937 if (NumElements > 4)
6945 if (NumElements > 2)
6949 if (NumElements > 4)
6958 VT.getStoreSize() == 16)
6961 if (NumElements > 2)
6970 NumElements == 2 && VT.getStoreSize() == 8 &&
7029 SDValue Ops[] = { ChainIn, Addr, NewOld };
7040 DAGCombinerInfo &DCI)
const {
7056 if (DCI.isAfterLegalizeDAG() && SrcVT ==
MVT::i32) {
7059 DCI.AddToWorklist(Cvt.
getNode());
7084 DAGCombinerInfo &DCI)
const {
7129 DAGCombinerInfo &DCI)
const {
7150 return (Opc ==
ISD::AND && (Val == 0 || Val == 0xffffffff)) ||
7151 (Opc ==
ISD::OR && (Val == 0xffffffff || Val == 0)) ||
7160 SDValue SITargetLowering::splitBinaryBitConstantOp(
7161 DAGCombinerInfo &DCI,
7204 if (!(C & 0x000000ff)) ZeroByteMask |= 0x000000ff;
7205 if (!(C & 0x0000ff00)) ZeroByteMask |= 0x0000ff00;
7206 if (!(C & 0x00ff0000)) ZeroByteMask |= 0x00ff0000;
7207 if (!(C & 0xff000000)) ZeroByteMask |= 0xff000000;
7208 uint32_t NonZeroByteMask = ~ZeroByteMask;
7209 if ((NonZeroByteMask & C) != NonZeroByteMask)
7238 return (0x03020100 & ConstMask) | (0x0c0c0c0c & ~ConstMask);
7244 return (0x03020100 & ~ConstMask) | ConstMask;
7252 return uint32_t((0x030201000c0c0c0cull << C) >> 32);
7258 return uint32_t(0x0c0c0c0c03020100ull >> C);
7265 DAGCombinerInfo &DCI)
const {
7266 if (DCI.isBeforeLegalize())
7278 = splitBinaryBitConstantOp(DCI,
SDLoc(N),
ISD::AND, LHS, CRHS))
7291 if (
auto *CShift = dyn_cast<ConstantSDNode>(LHS->
getOperand(1))) {
7292 unsigned Shift = CShift->getZExtValue();
7294 unsigned Offset = NB + Shift;
7295 if ((Offset & (Bits - 1)) == 0) {
7405 if (LHSMask != ~0u && RHSMask != ~0u) {
7408 if (LHSMask > RHSMask) {
7415 uint32_t LHSUsedLanes = ~(LHSMask & 0x0c0c0c0c) & 0x0c0c0c0c;
7416 uint32_t RHSUsedLanes = ~(RHSMask & 0x0c0c0c0c) & 0x0c0c0c0c;
7419 if (!(LHSUsedLanes & RHSUsedLanes) &&
7422 !(LHSUsedLanes == 0x0c0c0000 && RHSUsedLanes == 0x00000c0c)) {
7429 for (
unsigned I = 0;
I < 32;
I += 8) {
7431 if ((LHSMask & ByteSel) == 0x0c || (RHSMask & ByteSel) == 0x0c)
7432 Mask &= (0x0c <<
I) & 0xffffffff;
7437 uint32_t Sel = Mask | (LHSUsedLanes & 0x04040404);
7451 DAGCombinerInfo &DCI)
const {
7471 static const uint32_t MaxMask = 0x3ff;
7483 if (isa<ConstantSDNode>(RHS) && LHS.
hasOneUse() &&
7502 if (LHSMask != ~0u && RHSMask != ~0u) {
7505 if (LHSMask > RHSMask) {
7512 uint32_t LHSUsedLanes = ~(LHSMask & 0x0c0c0c0c) & 0x0c0c0c0c;
7513 uint32_t RHSUsedLanes = ~(RHSMask & 0x0c0c0c0c) & 0x0c0c0c0c;
7516 if (!(LHSUsedLanes & RHSUsedLanes) &&
7519 !(LHSUsedLanes == 0x0c0c0000 && RHSUsedLanes == 0x00000c0c)) {
7521 LHSMask &= ~RHSUsedLanes;
7522 RHSMask &= ~LHSUsedLanes;
7524 LHSMask |= LHSUsedLanes & 0x04040404;
7557 DCI.AddToWorklist(LowOr.
getNode());
7558 DCI.AddToWorklist(HiBits.
getNode());
7569 = splitBinaryBitConstantOp(DCI,
SDLoc(N),
ISD::OR, LHS, CRHS))
7577 DAGCombinerInfo &DCI)
const {
7588 = splitBinaryBitConstantOp(DCI,
SDLoc(N),
ISD::XOR, LHS, CRHS))
7652 SDValue SITargetLowering::performZeroExtendCombine(
SDNode *N,
7653 DAGCombinerInfo &DCI)
const {
7679 DAGCombinerInfo &DCI)
const {
7684 if (
const ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(Mask)) {
7685 if (CMask->isNullValue())
7696 DAGCombinerInfo &DCI)
const {
7718 if (
auto *CFP = dyn_cast<ConstantFPSDNode>(Op)) {
7719 auto F = CFP->getValueAPF();
7720 if (
F.isNaN() &&
F.isSignaling())
7847 unsigned IntrinsicID
7848 = cast<ConstantSDNode>(Op.
getOperand(0))->getZExtValue();
7850 switch (IntrinsicID) {
7871 SDValue SITargetLowering::getCanonicalConstantFP(
7898 return Op.
isUndef() || isa<ConstantFPSDNode>(
Op);
7901 SDValue SITargetLowering::performFCanonicalizeCombine(
7903 DAGCombinerInfo &DCI)
const {
7916 return getCanonicalConstantFP(DAG,
SDLoc(N), VT, CFP->getValueAPF());
7935 for (
unsigned I = 0;
I != 2; ++
I) {
7938 NewElts[
I] = getCanonicalConstantFP(DAG, SL, EltVT,
7939 CFP->getValueAPF());
7952 if (isa<ConstantFPSDNode>(NewElts[1]))
7953 NewElts[0] = isa<ConstantFPSDNode>(NewElts[1]) ?
7958 NewElts[1] = isa<ConstantFPSDNode>(NewElts[0]) ?
7979 SDValue Canon1 = getCanonicalConstantFP(DAG, SL, VT, CRHS->getValueAPF());
7980 DCI.AddToWorklist(Canon0.
getNode());
8010 SDValue SITargetLowering::performIntMed3ImmCombine(
8032 return DAG.
getNode(Med3Opc, SL, VT,
8112 DAGCombinerInfo &DCI)
const {
8155 if (
SDValue Med3 = performIntMed3ImmCombine(DAG,
SDLoc(N), Op0, Op1,
true))
8160 if (
SDValue Med3 = performIntMed3ImmCombine(DAG,
SDLoc(N), Op0, Op1,
false))
8173 if (
SDValue Res = performFPMed3ImmCombine(DAG,
SDLoc(N), Op0, Op1))
8184 return (CA->isExactlyValue(0.0) && CB->isExactlyValue(1.0)) ||
8185 (CA->isExactlyValue(1.0) && CB->isExactlyValue(0.0));
8194 DAGCombinerInfo &DCI)
const {
8218 if (isa<ConstantFPSDNode>(Src0) && !isa<ConstantFPSDNode>(Src1))
8221 if (isa<ConstantFPSDNode>(Src1) && !isa<ConstantFPSDNode>(Src2))
8224 if (isa<ConstantFPSDNode>(Src0) && !isa<ConstantFPSDNode>(Src1))
8235 DAGCombinerInfo &DCI)
const {
8243 SDValue SITargetLowering::performExtractVectorEltCombine(
8244 SDNode *N, DAGCombinerInfo &DCI)
const {
8266 if (Vec.
hasOneUse() && DCI.isBeforeLegalize()) {
8293 DCI.AddToWorklist(Elt0.
getNode());
8294 DCI.AddToWorklist(Elt1.
getNode());
8308 if (VecSize <= 256 && (VecSize > 64 || EltSize >= 32) &&
8325 if (!DCI.isBeforeLegalize())
8332 if (isa<MemSDNode>(Vec) &&
8336 VecSize % 32 == 0 &&
8340 unsigned BitIndex = Idx->getZExtValue() * EltSize;
8341 unsigned EltIdx = BitIndex / 32;
8342 unsigned LeftoverBitIdx = BitIndex % 32;
8346 DCI.AddToWorklist(Cast.
getNode());
8350 DCI.AddToWorklist(Elt.
getNode());
8353 DCI.AddToWorklist(Srl.
getNode());
8356 DCI.AddToWorklist(Trunc.
getNode());
8364 SITargetLowering::performInsertVectorEltCombine(
SDNode *N,
8365 DAGCombinerInfo &DCI)
const {
8379 if (isa<ConstantSDNode>(Idx) ||
8380 VecSize > 256 || (VecSize <= 64 && EltSize < 32))
8399 unsigned SITargetLowering::getFusedOpcode(
const SelectionDAG &DAG,
8401 const SDNode *N1)
const {
8432 DAGCombinerInfo &DCI)
const {
8456 return getMad64_32(DAG, SL, VT, MulLHS, MulRHS, AddRHS,
false);
8463 return getMad64_32(DAG, SL, VT, MulLHS, MulRHS, AddRHS,
true);
8469 if (VT !=
MVT::i32 || !DCI.isAfterLegalizeDAG())
8491 return DAG.
getNode(Opc, SL, VTList, Args);
8496 if (!C || C->getZExtValue() != 0)
break;
8505 DAGCombinerInfo &DCI)
const {
8523 if (!C || C->getZExtValue() != 0)
8531 SDValue SITargetLowering::performAddCarrySubCarryCombine(
SDNode *N,
8532 DAGCombinerInfo &DCI)
const {
8538 if (!C || C->getZExtValue() != 0)
8557 DAGCombinerInfo &DCI)
const {
8575 unsigned FusedOp = getFusedOpcode(DAG, N, LHS.
getNode());
8578 return DAG.
getNode(FusedOp, SL, VT, A, Two, RHS);
8587 unsigned FusedOp = getFusedOpcode(DAG, N, RHS.
getNode());
8590 return DAG.
getNode(FusedOp, SL, VT, A, Two, LHS);
8599 DAGCombinerInfo &DCI)
const {
8617 SDValue A = LHS.getOperand(0);
8619 unsigned FusedOp = getFusedOpcode(DAG, N, LHS.getNode());
8624 return DAG.
getNode(FusedOp, SL, VT, A, Two, NegRHS);
8634 unsigned FusedOp = getFusedOpcode(DAG, N, RHS.
getNode());
8637 return DAG.
getNode(FusedOp, SL, VT, A, NegTwo, LHS);
8646 DAGCombinerInfo &DCI)
const {
8705 if (Vec1 == Vec2 || Vec3 == Vec4)
8711 if ((Vec1 == Vec3 && Vec2 == Vec4) ||
8712 (Vec1 == Vec4 && Vec2 == Vec3)) {
8721 DAGCombinerInfo &DCI)
const {
8816 SDValue SITargetLowering::performCvtF32UByteNCombine(
SDNode *N,
8817 DAGCombinerInfo &DCI)
const {
8834 dyn_cast<ConstantSDNode>(Srl.
getOperand(1))) {
8838 unsigned SrcOffset = C->getZExtValue() + 8 *
Offset;
8839 if (SrcOffset < 32 && SrcOffset % 8 == 0) {
8850 !DCI.isBeforeLegalizeOps());
8852 if (TLI.SimplifyDemandedBits(Src, Demanded, Known, TLO)) {
8853 DCI.CommitTargetLoweringOpt(TLO);
8860 DAGCombinerInfo &DCI)
const {
8891 return performAddCombine(N, DCI);
8893 return performSubCombine(N, DCI);
8896 return performAddCarrySubCarryCombine(N, DCI);
8898 return performFAddCombine(N, DCI);
8900 return performFSubCombine(N, DCI);
8902 return performSetCCCombine(N, DCI);
8913 return performMinMaxCombine(N, DCI);
8915 return performFMACombine(N, DCI);
8917 if (
SDValue Widended = widenLoad(cast<LoadSDNode>(N), DCI))
8944 return performMemSDNodeCombine(cast<MemSDNode>(N), DCI);
8946 return performAndCombine(N, DCI);
8948 return performOrCombine(N, DCI);
8950 return performXorCombine(N, DCI);
8952 return performZeroExtendCombine(N, DCI);
8954 return performClassCombine(N, DCI);
8956 return performFCanonicalizeCombine(N, DCI);
8958 return performRcpCombine(N, DCI);
8973 return performUCharToFloatCombine(N, DCI);
8978 return performCvtF32UByteNCombine(N, DCI);
8980 return performFMed3Combine(N, DCI);
8982 return performCvtPkRTZCombine(N, DCI);
8984 return performClampCombine(N, DCI);
9004 return performExtractVectorEltCombine(N, DCI);
9006 return performInsertVectorEltCombine(N, DCI);
9015 case AMDGPU::sub0:
return 0;
9016 case AMDGPU::sub1:
return 1;
9017 case AMDGPU::sub2:
return 2;
9018 case AMDGPU::sub3:
return 3;
9019 case AMDGPU::sub4:
return 4;
9037 unsigned NewDmask = 0;
9042 unsigned TFCLane = 0;
9045 if (OldDmask == 0) {
9053 TFCLane = OldBitsSet;
9061 if (
I.getUse().getResNo() != 0)
9065 if (!
I->isMachineOpcode() ||
9066 I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
9076 if (UsesTFC && Lane == TFCLane) {
9081 for (
unsigned i = 0, Dmask = OldDmask; (i <= Lane) && (Dmask != 0); i++) {
9083 Dmask &= ~(1 << Comp);
9091 NewDmask |= 1 << Comp;
9096 bool NoChannels = !NewDmask;
9099 if (OldBitsSet == 1)
9105 if (NewDmask == OldDmask)
9114 unsigned NewChannels = BitsSet + UsesTFC;
9118 assert(NewOpcode != -1 &&
9120 "failed to find equivalent MIMG op");
9130 MVT ResultVT = NewChannels == 1 ?
9132 NewChannels == 5 ? 8 : NewChannels);
9146 if (NewChannels == 1) {
9156 for (
unsigned i = 0, Idx = AMDGPU::sub0; i < 5; ++i) {
9161 if (i || !NoChannels)
9170 case AMDGPU::sub0: Idx = AMDGPU::sub1;
break;
9171 case AMDGPU::sub1: Idx = AMDGPU::sub2;
break;
9172 case AMDGPU::sub2: Idx = AMDGPU::sub3;
break;
9173 case AMDGPU::sub3: Idx = AMDGPU::sub4;
break;
9185 return isa<FrameIndexSDNode>(
Op);
9199 if (SrcVal.getValueType() ==
MVT::i1 &&
9242 if (TII->
isMIMG(Opcode) && !TII->get(Opcode).mayStore() &&
9244 return adjustWritemask(Node, DAG);
9247 if (Opcode == AMDGPU::INSERT_SUBREG ||
9248 Opcode == AMDGPU::REG_SEQUENCE) {
9254 case AMDGPU::V_DIV_SCALE_F32:
9255 case AMDGPU::V_DIV_SCALE_F64: {
9265 (Src0 == Src1 || Src0 == Src2))
9279 if (Src0.isMachineOpcode() &&
9280 Src0.getMachineOpcode() == AMDGPU::IMPLICIT_DEF) {
9325 if (NoRetAtomicOp != -1) {
9327 MI.
setDesc(TII->get(NoRetAtomicOp));
9345 MI.
setDesc(TII->get(NoRetAtomicOp));
9353 TII->get(AMDGPU::IMPLICIT_DEF),
Def);
9402 uint64_t RsrcDword2And3)
const {
9412 RsrcDword2And3 & UINT64_C(0xFFFFFFFF));
9434 std::pair<unsigned, const TargetRegisterClass *>
9439 if (Constraint.
size() == 1) {
9440 switch (Constraint[0]) {
9447 return std::make_pair(0U,
nullptr);
9450 RC = &AMDGPU::SReg_32_XM0RegClass;
9453 RC = &AMDGPU::SGPR_64RegClass;
9456 RC = &AMDGPU::SReg_128RegClass;
9459 RC = &AMDGPU::SReg_256RegClass;
9462 RC = &AMDGPU::SReg_512RegClass;
9469 return std::make_pair(0U,
nullptr);
9472 RC = &AMDGPU::VGPR_32RegClass;
9475 RC = &AMDGPU::VReg_64RegClass;
9478 RC = &AMDGPU::VReg_96RegClass;
9481 RC = &AMDGPU::VReg_128RegClass;
9484 RC = &AMDGPU::VReg_256RegClass;
9487 RC = &AMDGPU::VReg_512RegClass;
9496 return std::make_pair(0U, RC);
9499 if (Constraint.
size() > 1) {
9500 if (Constraint[1] ==
'v') {
9501 RC = &AMDGPU::VGPR_32RegClass;
9502 }
else if (Constraint[1] ==
's') {
9503 RC = &AMDGPU::SGPR_32RegClass;
9509 if (!Failed && Idx < RC->getNumRegs())
9518 if (Constraint.
size() == 1) {
9519 switch (Constraint[0]) {
9548 MFI.hasVarSizedObjects() ||
9573 const APInt &DemandedElts,
9575 unsigned Depth)
const {
9613 if (TRI.isPhysicalRegister(Reg))
9616 if (MRI.isLiveIn(Reg)) {
9648 cast<ConstantSDNode>(N->
getOperand(0))->getZExtValue());
9651 cast<ConstantSDNode>(N->
getOperand(1))->getZExtValue());
9678 unsigned Depth)
const {
bool hasBCNT(unsigned Size) const
bool enableIEEEBit(const MachineFunction &MF) const
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI)
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
unsigned getFrameOffsetReg() const
static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static void allocateSpecialEntryInputVGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info)
unsigned getScratchWaveOffsetReg() const
void setWorkItemIDX(ArgDescriptor Arg)
bool hasDispatchPtr() const
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
constexpr bool isUInt< 32 >(uint64_t x)
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set, or Regs.size() if they are all allocated.
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array...
static MVT getIntegerVT(unsigned BitWidth)
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
ArgDescriptor DispatchPtr
const MachineInstrBuilder & add(const MachineOperand &MO) const
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
BUILTIN_OP_END - This must be the last enum value in this list.
static void r3(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E, int I, uint32_t *Buf)
A parsed version of the target data layout string in and methods for querying it. ...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred)
getICmpCondCode - Return the ISD condition code corresponding to the given LLVM IR integer condition ...
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isLegalGlobalAddressingMode(const AddrMode &AM) const
Interface definition for SIRegisterInfo.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
void setFuncArgInfo(const Function &F, const AMDGPUFunctionArgInfo &ArgInfo)
static void r2(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E, int I, uint32_t *Buf)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const SDValue & getOffset() const
bool hasPrivateSegmentBuffer() const
unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed...
C - The default llvm calling convention, compatible with C.
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
const GlobalValue * getGlobal() const
bool hasApertureRegs() const
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isDivergent(const Value *V) const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
This class represents an incoming formal argument to a Function.
Diagnostic information for unsupported feature in backend.
SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, unsigned Reg, EVT VT, const SDLoc &SL, bool RawReg=false) const
Helper function that adds Reg to the LiveIn list of the DAG's MachineFunction.
AMDGPU specific subclass of TargetSubtarget.
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
bool isPSInputAllocated(unsigned Index) const
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd)...
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
static ConstantFPSDNode * getSplatConstantFP(SDValue Op)
bool hasPrivateSegmentWaveByteOffset() const
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
BR_CC - Conditional branch.
This class represents lattice values for constants.
bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &, MachineFunction &MF, unsigned IntrinsicID) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types...
value_iterator value_end() const
Type * getParamType(unsigned i) const
Parameter type accessors.
bool isCompareAndSwap() const
Returns true if this SDNode represents cmpxchg atomic operation, false otherwise. ...
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool supportSplitCSR(MachineFunction *MF) const override
Return true if the target supports that a subset of CSRs for the given machine function is handled ex...
uint64_t getDefaultRsrcDataFormat() const
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
iterator begin() const
begin/end - Return all of the registers in this class.
static bool isBoolSGPR(SDValue V)
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
unsigned getStackPtrOffsetReg() const
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
static void allocateSystemSGPRs(CCState &CCInfo, MachineFunction &MF, SIMachineFunctionInfo &Info, CallingConv::ID CallConv, bool IsShader)
2: 32-bit floating point type
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL, SDValue V) const
bool isVector() const
Return true if this is a vector value type.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
bool isAllocated(unsigned Reg) const
isAllocated - Return true if the specified register (or an alias) is allocated.
const SDValue & getBasePtr() const
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
unsigned getRegister(unsigned i) const
Return the specified register in the class.
bool isNegative() const
Return true if the value is negative.
bool hasFlatGlobalInsts() const
bool canMergeStoresTo(unsigned AS, EVT MemVT, const SelectionDAG &DAG) const override
Returns if it's reasonable to merge stores to MemVT size.
SDValue loadInputValue(SelectionDAG &DAG, const TargetRegisterClass *RC, EVT VT, const SDLoc &SL, const ArgDescriptor &Arg) const
bool supportsMinMaxDenormModes() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void push_back(const T &Elt)
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
Describe properties that are true of each instruction in the target description file.
unsigned getReg() const
getReg - Returns the register number.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
void setIsUndef(bool Val=true)
bool hasFmaMixInsts() const
MachineMemOperand::Flags flags
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
const SDValue & getValue() const
unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI)
bool hasImplicitBufferPtr() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
SDVTList getVTList() const
This class represents a function call, abstracting a target machine's calling convention.
unsigned addWorkGroupIDY()
unsigned getSubReg() const
void setHasFloatingPointExceptions(bool FPExceptions=true)
Tells the code generator that this target supports floating point exceptions and cares about preservi...
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
const GCNSubtarget * getSubtarget() const
bool hasTrigReducedRange() const
Address space for 32-bit constant memory.
float BitsToFloat(uint32_t Bits)
This function takes a 32-bit integer and returns the bit equivalent float.
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
unsigned getVectorNumElements() const
const SDValue & getChain() const
Function Alias Analysis Results
Address space for private memory.
LLVMContext & getContext() const
All values hold a context through their type.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const SIInstrInfo * getInstrInfo() const override
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
static bool bitOpWithConstantIsReducible(unsigned Opc, uint32_t Val)
unsigned getAlignment() const
unsigned getPSInputAddr() const
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned Dim)
Val, Success, OUTCHAIN = ATOMIC_CMP_SWAP_WITH_SUCCESS(INCHAIN, ptr, cmp, swap) N.b.
bool hasWorkItemIDZ() const
SDNode * legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const
Legalize target independent instructions (e.g.
static SDValue getFPTernOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL, EVT VT, SDValue A, SDValue B, SDValue C, SDValue GlueChain)
virtual const TargetRegisterClass * getRegClassFor(MVT VT) const
Return the register class that should be used for the specified value type.
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
bool isInteger() const
Return true if this is an integer or a vector integer type.
void markPSInputEnabled(unsigned Index)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const fltSemantics & getSemantics() const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
1: 16-bit floating point type
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
[US]{MIN/MAX} - Binary minimum or maximum or signed or unsigned integers.
const SDNodeFlags getFlags() const
MachineFunction & getMachineFunction() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
bool isVectorTy() const
True if this is an instance of VectorType.
iv Induction Variable Users
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
LLVM_READONLY int getAtomicNoRetOp(uint16_t Opcode)
bool isByteSized() const
Return true if the bit size is a multiple of 8.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
CLAMP value between 0.0 and 1.0.
static MachineBasicBlock * emitIndirectDst(MachineInstr &MI, MachineBasicBlock &MBB, const GCNSubtarget &ST)
bool isInlineConstant(const APInt &Imm) const
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
bool hasFastFMAF32() const
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
bool hasDispatchID() const
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
const DebugLoc & getDebugLoc() const
LLVM_READONLY const MIMGLZMappingInfo * getMIMGLZMappingInfo(unsigned L)
const SIRegisterInfo & getRegisterInfo() const
static void allocateSpecialInputSGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info)
return AArch64::GPR64RegClass contains(Reg)
bool isAllOnesValue() const
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool hasWorkItemIDX() const
bool hasVOP3PInsts() const
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
static bool isFlatGlobalAddrSpace(unsigned AS)
unsigned getAddressSpace() const
Return the address space for the associated pointer.
bool hasFP64Denormals() const
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool hasFlatScratchInit() const
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
MachineSDNode * wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr) const
static SDValue emitRemovedIntrinsicError(SelectionDAG &DAG, const SDLoc &DL, EVT VT)
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
Value * getArgOperand(unsigned i) const
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations...
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
void setPrivateSegmentWaveByteOffset(unsigned Reg)
#define FP_DENORM_FLUSH_NONE
Calling convention used for Mesa/AMDPAL geometry shaders.
static unsigned numBitsUnsigned(SDValue Op, SelectionDAG &DAG)
void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx)
Sets stack object index for Dim's work group ID to ObjectIdx.
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
bool hasOneUse() const
Return true if there is exactly one use of this node.
void setScratchRSrcReg(unsigned Reg)
bool isBeforeLegalize() const
static unsigned findFirstFreeSGPR(CCState &CCInfo)
A description of a memory reference used in the backend.
unsigned DemoteRegister
DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg allocated to hold a pointer to ...
bool useVGPRIndexMode(bool UserEnable) const
Address space for constant memory (VTX2)
Calling convention used for Mesa/AMDPAL compute shaders.
unsigned getBytesInStackArgArea() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const
Shift and rotation operations.
unsigned getNumOperands() const
Retuns the total number of operands.
static bool isGather4(const MachineInstr &MI)
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
void setHasNonSpillStackObjects(bool StackObject=true)
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
unsigned addDispatchID(const SIRegisterInfo &TRI)
bool isEntryFunction() const
static bool isMIMG(const MachineInstr &MI)
void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx)
Sets stack object index for Dim's work item ID to ObjectIdx.
void setIfReturnsVoid(bool Value)
bool enableDX10Clamp() const
bool isKnownNeverNaNForTargetNode(SDValue Op, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const override
If SNaN is false,.
static void allocateHSAUserSGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
The returned value is undefined.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
bool isMemOpUniform(const SDNode *N) const
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
static unsigned SubIdx2Lane(unsigned Idx)
Helper function for adjustWritemask.
CopyToReg - This node has three operands: a chain, a register number to set to this value...
ArrayRef< MachineMemOperand * > memoperands() const
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
bool isIntegerTy() const
True if this is an instance of IntegerType.
op_iterator op_end() const
uint64_t getConstantOperandVal(unsigned i) const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
static ArgDescriptor allocateSGPR64Input(CCState &CCInfo)
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
InstrTy * getInstruction() const
static MVT memVTFromAggregate(Type *Ty)
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
The memory access is dereferenceable (i.e., doesn't trap).
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amt) For double-word atomic operations: ValLo, ValHi, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amtLo, amtHi) ValLo, ValHi, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amtLo, amtHi) These correspond to the atomicrmw instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
bool isTrapHandlerEnabled() const
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
const DataLayout & getDataLayout() const
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE R Default(T Value)
unsigned reservedStackPtrOffsetReg(const MachineFunction &MF) const
This is an SDNode representing atomic operations.
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
uint64_t getNumElements() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
LocInfo getLocInfo() const
bool isSGPRClass(const TargetRegisterClass *RC) const
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
This file implements a class to represent arbitrary precision integral constant values and operations...
INLINEASM - Represents an inline asm block.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
bool isSDNodeSourceOfDivergence(const SDNode *N, FunctionLoweringInfo *FLI, LegacyDivergenceAnalysis *DA) const override
SmallVector< ISD::InputArg, 32 > Ins
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
unsigned getSizeInBits() const
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
static void reservePrivateMemoryRegs(const TargetMachine &TM, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info)
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
Fast - This calling convention attempts to make calls as fast as possible (e.g.
unsigned getScalarSizeInBits() const
static void allocateSpecialInputVGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info)
Class to represent function types.
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
int64_t getSExtValue() const
Get sign extended value.
SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const
Split a vector load into 2 loads of half the vector.
Type * getType() const
All values are typed, get the type of this value.
MachineFunction & getMachineFunction() const
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
void setStackPtrOffsetReg(unsigned Reg)
void limitOccupancy(const MachineFunction &MF)
void clearKillFlags(unsigned Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
bool useFlatForGlobal() const
SDValue getRegisterMask(const uint32_t *RegMask)
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, const GCNSubtarget *Subtarget, uint32_t Align)
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
const TargetMachine & getTarget() const
Simple integer binary arithmetic operators.
SITargetLowering(const TargetMachine &tm, const GCNSubtarget &STI)
SmallVector< ISD::OutputArg, 32 > Outs
static bool vectorEltWillFoldAway(SDValue Op)
static LLVM_ATTRIBUTE_UNUSED bool isCopyFromRegOfInlineAsm(const SDNode *N)
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always beneficiates from combining into FMA for a given value type...
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc)
uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
static SDValue lowerFCMPIntrinsic(const SITargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue is known to never be NaN.
std::pair< SDValue, SDValue > split64BitValue(SDValue Op, SelectionDAG &DAG) const
Return 64-bit value Op as two 32-bit integers.
static bool parseCachePolicy(SDValue CachePolicy, SelectionDAG &DAG, SDValue *GLC, SDValue *SLC)
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
op_iterator op_begin() const
static SDValue getLoadExtOrTrunc(SelectionDAG &DAG, ISD::LoadExtType ExtType, SDValue Op, const SDLoc &SL, EVT VT)
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors...
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred)
getFCmpCondCode - Return the ISD condition code corresponding to the given LLVM IR floating-point con...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
bool getAddrModeArguments(IntrinsicInst *, SmallVectorImpl< Value *> &, Type *&) const override
CodeGenPrepare sinks address calculations into the same BB as Load/Store instructions reading the add...
void insertCopiesSplitCSR(MachineBasicBlock *Entry, const SmallVectorImpl< MachineBasicBlock *> &Exits) const override
Insert explicit copies in entry and exit blocks.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
unsigned getUndefRegState(bool B)
void markPSInputAllocated(unsigned Index)
bool hasWorkGroupIDX() const
amdgpu Simplify well known AMD library false Value * Callee
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
static SDValue emitNonHSAIntrinsicError(SelectionDAG &DAG, const SDLoc &DL, EVT VT)
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
static bool parseTexFail(SDValue TexFailCtrl, SelectionDAG &DAG, SDValue *TFE, SDValue *LWE, bool &IsTexFail)
unsigned getSrcAddressSpace() const
Value * getOperand(unsigned i) const
Analysis containing CSE Info
Class to represent pointers.
unsigned getByValSize() const
UNDEF - An undefined node.
This class is used to represent ISD::STORE nodes.
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
SDValue splitBinaryVectorOp(SDValue Op, SelectionDAG &DAG) const
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Flag
These should be considered private to the implementation of the MCInstrDesc class.
bool hasAllowReciprocal() const
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
TargetInstrInfo - Interface to description of machine instruction set.
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
const Value * getValue() const
Return the base address of the memory access.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
bool hasUnalignedBufferAccess() const
11: Arbitrary bit width integers
bool hasAllowContract() const
The memory access is volatile.
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool hasFP32Denormals() const
const SDValue & getBasePtr() const
static bool fp16SrcZerosHighBits(unsigned Opc)
A switch()-like statement whose cases are string literals.
initializer< Ty > init(const Ty &Val)
int64_t getOffset() const
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const
FPOpFusion::FPOpFusionMode AllowFPOpFusion
AllowFPOpFusion - This flag is set by the -fuse-fp-ops=xxx option.
Control flow instructions. These all have token chains.
bool isMemOpHasNoClobberedMemOperand(const SDNode *N) const
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
MachineBasicBlock * splitKillBlock(MachineInstr &MI, MachineBasicBlock *BB) const
constexpr bool isUInt< 8 >(uint64_t x)
unsigned const MachineRegisterInfo * MRI
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
bool shouldAssumeDSOLocal(const Module &M, const GlobalValue *GV) const
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
unsigned addWorkGroupInfo()
unsigned reservedPrivateSegmentWaveByteOffsetReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch wave offset in case spilling is needed...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
This is an important class for using LLVM in a threaded context.
unsigned getStackAlignment() const
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
Simple binary floating point operators.
SDValue storeStackInputValue(SelectionDAG &DAG, const SDLoc &SL, SDValue Chain, SDValue ArgVal, int64_t Offset) const
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
bool isMachineOpcode() const
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Address space for flat memory.
unsigned getScalarSizeInBits() const
size_t size() const
size - Get the array size.
SDNode * PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override
Fold the instructions after selecting them.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
unsigned getReturnAddressReg(const MachineFunction &MF) const
const RsrcIntrinsic * lookupRsrcIntrinsic(unsigned Intr)
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
const SDValue & getOperand(unsigned Num) const
bool hasFPExceptions() const
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const MCPhysReg * getCalleeSavedRegsViaCopy(const MachineFunction *MF) const
bool hasBitPreservingFPLogic(EVT VT) const override
Return true if it is safe to transform an integer-domain bitwise operation into the equivalent floati...
std::pair< const ArgDescriptor *, const TargetRegisterClass * > getPreloadedValue(PreloadedValue Value) const
const GlobalValue * getGlobal() const
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
bool isKnownNeverSNaN(SDValue Op, unsigned Depth=0) const
bool has16BitInsts() const
EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const override
Returns the target specific optimal type for load and store operations as a result of memset...
bool isEntryFunctionCC(CallingConv::ID CC)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
static bool isUniformMMO(const MachineMemOperand *MMO)
bool isEligibleForTailCallOptimization(SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SmallVectorImpl< ISD::InputArg > &Ins, SelectionDAG &DAG) const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
virtual void computeKnownBitsForFrameIndex(const SDValue FIOp, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
const AMDGPUBufferPseudoSourceValue * getBufferPSV(const SIInstrInfo &TII, const Value *BufferRsrc)
bool isKnownNeverNaNForTargetNode(SDValue Op, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const override
If SNaN is false,.
SI DAG Lowering interface definition.
unsigned getDestAddressSpace() const
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
bool mayBeEmittedAsTailCall(const CallInst *) const override
Return true if the target may be able emit the call instruction as a tail call.
static ArgDescriptor createRegister(unsigned Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This class provides iterator support for SDUse operands that use a specific SDNode.
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side...
Address space for local memory.
unsigned getMachineOpcode() const
static SDValue constructRetValue(SelectionDAG &DAG, MachineSDNode *Result, ArrayRef< EVT > ResultTypes, bool IsTexFail, bool Unpacked, bool IsD16, int DMaskPop, int NumVDataDwords, const SDLoc &DL, LLVMContext &Context)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
unsigned addQueuePtr(const SIRegisterInfo &TRI)
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
static unsigned getMOVRELDPseudo(const SIRegisterInfo &TRI, const TargetRegisterClass *VecRC)
TRAP - Trapping instruction.
bool hasWorkItemIDY() const
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
const APInt & getAPIntValue() const
Generation getGeneration() const
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline...
const Triple & getTargetTriple() const
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
unsigned getAddressSpace() const
Return the address space of the Pointer type.
static bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
DEBUGTRAP - Trap intended to get the attention of a debugger.
static SDValue getMad64_32(SelectionDAG &DAG, const SDLoc &SL, EVT VT, SDValue N0, SDValue N1, SDValue N2, bool Signed)
#define FP_DENORM_FLUSH_IN_FLUSH_OUT
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
self_iterator getIterator()
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, unsigned Size=0)
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
bool hasWorkGroupIDY() const
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y)...
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
static std::pair< unsigned, int > computeIndirectRegAndOffset(const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, unsigned VecReg, int Offset)
AMDGPUFunctionArgInfo & getArgInfo()
The AMDGPU TargetMachine interface definition for hw codgen targets.
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
static void r1(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E, int I, uint32_t *Buf)
Bit counting operators with an undefined result for zero inputs.
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
static MachineBasicBlock * emitIndirectSrc(MachineInstr &MI, MachineBasicBlock &MBB, const GCNSubtarget &ST)
const Pass * getPass() const
Address space for global memory (RAT0, VTX0).
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Calling convention used for Mesa/AMDPAL pixel shaders.
void setScratchWaveOffsetReg(unsigned Reg)
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
This structure contains all information that is necessary for lowering calls.
bool hasUnalignedScratchAccess() const
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
const TargetMachine & getTargetMachine() const
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
TrapHandlerAbi getTrapHandlerAbi() const
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
bool hasImplicitArgPtr() const
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value...
unsigned addDispatchPtr(const SIRegisterInfo &TRI)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
Triple - Helper class for working with autoconf configuration names.
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)
Selects the correct CCAssignFn for a given CallingConvention value.
bool hasUnpackedD16VMem() const
unsigned getAddressSpace() const
unsigned getQueuePtrUserSGPR() const
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
The memory access writes data.
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
bool getScalarizeGlobalBehavior() const
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
uint64_t getNullPointerValue(unsigned AddrSpace) const
Get the integer value of a null pointer in the given address space.
static bool setM0ToIndexFromSGPR(const SIInstrInfo *TII, MachineRegisterInfo &MRI, MachineInstr &MI, int Offset, bool UseGPRIdxMode, bool IsIndirectSrc)
bool hasFlatAddressSpace() const
unsigned getWavefrontSize() const
static SDValue buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV, const SDLoc &DL, unsigned Offset, EVT PtrVT, unsigned GAFlags=SIInstrInfo::MO_NONE)
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type...
SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, MachineFrameInfo &MFI, int ClobberedFI) const
static bool isUndef(ArrayRef< int > Mask)
static ArgDescriptor allocateSGPR32InputImpl(CCState &CCInfo, const TargetRegisterClass *RC, unsigned NumArgRegs)
TokenFactor - This node takes multiple tokens as input and produces a single token result...
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override
EVT is not used in-tree, but is used by out-of-tree target.
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
MachineSDNode * buildRSRC(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr, uint32_t RsrcDword1, uint64_t RsrcDword2And3) const
Return a resource descriptor with the 'Add TID' bit enabled The TID (Thread ID) is multiplied by the ...
const TargetLowering & getTargetLoweringInfo() const
bool enableHugePrivateBuffer() const
Iterator for intrusive lists based on ilist_node.
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
CCState - This class holds information needed while lowering arguments and return values...
void setNoUnsignedWrap(bool b)
unsigned countPopulation(T Value)
Count the number of set bits in a value.
SDValue scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
static ArgDescriptor allocateSGPR32Input(CCState &CCInfo)
static uint32_t getPermuteMask(SelectionDAG &DAG, SDValue V)
bool isFPExtFoldable(unsigned Opcode, EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation input to an Opcode operation is free (for instance, because half-precision floating-point numbers are implicitly extended to float-precision) for an FMA instruction.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
This is the shared class of boolean and integer constants.
bool hasKernargSegmentPtr() const
Returns platform specific canonical encoding of a floating point number.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
EVT getVectorElementType() const
Given a vector type, return the type of each element.
testing::Matcher< const detail::ErrorHolder & > Failed()
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
unsigned getLDSSize() const
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue lowerUnhandledCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals, StringRef Reason) const
bool hasMadMixInsts() const
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
ADDRSPACECAST - This operator converts between pointers of different address spaces.
MachineOperand class - Representation of each machine instruction operand.
value_iterator value_begin() const
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
const DebugLoc & getDebugLoc() const
CCValAssign - Represent assignment of one arg/retval to a location.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo)
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
BRCOND - Conditional branch.
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on two values, following the IEEE-754 2008 definition.
An SDNode that represents everything that will be needed to construct a MachineInstr.
bool hasMin3Max3_16() const
Byte Swap and Counting operators.
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
static SDValue getFPBinOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL, EVT VT, SDValue A, SDValue B, SDValue GlueChain)
This is an abstract virtual class for memory operations.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_READNONE bool isKernel(CallingConv::ID CC)
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
static uint32_t getConstantPermuteMask(uint32_t C)
static MachineBasicBlock::iterator loadM0FromVGPR(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineInstr &MI, unsigned InitResultReg, unsigned PhiReg, int Offset, bool UseGPRIdxMode, bool IsIndirectSrc)
void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0)
Append the extracted elements from Start to Count out of the vector Op in Args.
Represents one node in the SelectionDAG.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array...
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
const Function & getFunction() const
Return the LLVM function that this machine code represents.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static SDNode * findUser(SDValue Value, unsigned Opcode)
Helper function for LowerBRCOND.
void setWorkItemIDZ(ArgDescriptor Arg)
static bool allUsesHaveSourceMods(const SDNode *N, unsigned CostThreshold=4)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const
Class to represent vector types.
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT...
EVT getMemoryVT() const
Return the type of the in-memory value.
Target - Wrapper for Target specific information.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Class for arbitrary precision integers.
unsigned getByValAlign() const
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
bool isShader(CallingConv::ID cc)
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Select(COND, TRUEVAL, FALSEVAL).
Interface for the AMDGPU Implementation of the Intrinsic Info class.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
void finalizeLowering(MachineFunction &MF) const override
Execute target specific actions to finalize target lowering.
static SDValue getBuildDwordsVector(SelectionDAG &DAG, SDLoc DL, ArrayRef< SDValue > Elts)
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
ANY_EXTEND - Used for integer types. The high bits are undefined.
static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG)
LLVM_ATTRIBUTE_ALWAYS_INLINE StringSwitch & Case(StringLiteral S, T Value)
iterator insert(iterator I, T &&Elt)
void replaceRegWith(unsigned FromReg, unsigned ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors...
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, bool *IsFast) const override
Determine if the target supports unaligned memory accesses.
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
Assign the register class depending on the number of bits set in the writemask.
static bool isVOP3(const MachineInstr &MI)
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override
Returns true if a cast between SrcAS and DestAS is a noop.
amdgpu Simplify well known AMD library false Value Value * Arg
const MachineBasicBlock * getParent() const
unsigned addFlatScratchInit(const SIRegisterInfo &TRI)
static CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
std::pair< const ArgDescriptor *, const TargetRegisterClass * > getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const
unsigned getPSInputEnable() const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
bool isCheapAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
These are IR-level optimization flags that may be propagated to SDNodes.
static SDValue buildSMovImm32(SelectionDAG &DAG, const SDLoc &DL, uint64_t Val)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array...
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
SmallVector< SDValue, 32 > OutVals
Interface definition for SIInstrInfo.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
bool isVector() const
Return true if this is a vector value type.
bool isCanonicalized(SelectionDAG &DAG, SDValue Op, unsigned MaxDepth=5) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Bitwise operators - logical and, logical or, logical xor.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
static cl::opt< unsigned > AssumeFrameIndexHighZeroBits("amdgpu-frame-index-zero-bits", cl::desc("High bits of frame index assumed to be zero"), cl::init(5), cl::ReallyHidden)
void emplace_back(ArgTypes &&... Args)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getLocMemOffset() const
LLVM_NODISCARD bool empty() const
static MachineBasicBlock::iterator emitLoadM0FromVGPRLoop(const SIInstrInfo *TII, MachineRegisterInfo &MRI, MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB, const DebugLoc &DL, const MachineOperand &IdxReg, unsigned InitReg, unsigned ResultReg, unsigned PhiReg, unsigned InitSaveExecReg, int Offset, bool UseGPRIdxMode, bool IsIndirectSrc)
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
static bool isClampZeroToOne(SDValue A, SDValue B)
PointerUnion< const Value *, const PseudoSourceValue * > ptrVal
SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL, unsigned Opc, SDValue LHS, uint32_t ValLo, uint32_t ValHi) const
Split the 64-bit value LHS into two 32-bit components, and perform the binary operation Opc to it wit...
const Function * getParent() const
Return the enclosing method, or null if none.
static cl::opt< bool > EnableVGPRIndexMode("amdgpu-vgpr-index-mode", cl::desc("Use GPR indexing mode instead of movrel for vector indexing"), cl::init(false))
bool isShuffleMaskLegal(ArrayRef< int >, EVT) const override
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations, those with specific masks.
static ArgDescriptor createStack(unsigned Reg)
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Flags getFlags() const
Return the raw flags of the source value,.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
The memory access always returns the same value (or traps).
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
bool hasFlatInstOffsets() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool isAmdHsaOrMesa(const Function &F) const
bool shouldEmitConstantsToTextSection(const Triple &TT)
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
SDValue splitUnaryVectorOp(SDValue Op, SelectionDAG &DAG) const
const Value * getValueFromVirtualReg(unsigned Vreg)
This method is called from TargetLowerinInfo::isSDNodeSourceOfDivergence to get the Value correspondi...
unsigned addWorkGroupIDZ()
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
static void r0(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E, int I, uint32_t *Buf)
unsigned getOpcode() const
FSINCOS - Compute both fsin and fcos as a single operation.
SDValue getValue(unsigned R) const
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
bool isInfinity() const
Return true if the value is an infinity.
constexpr bool isUInt< 16 >(uint64_t x)
void setSimpleHint(unsigned VReg, unsigned PrefReg)
Specify the preferred (target independent) register allocation hint for the specified virtual registe...
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getCondCode(ISD::CondCode Cond)
static bool isFrameIndexOp(SDValue Op)
bool hasWorkGroupInfo() const
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it's an indirect...
const MachinePointerInfo & getPointerInfo() const
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
unsigned addPrivateSegmentWaveByteOffset()
unsigned getRegister() const
bool hasFnAttribute(Attribute::AttrKind Kind) const
Equivalent to hasAttribute(AttributeList::FunctionIndex, Kind) but may be faster. ...
bool memoperands_empty() const
Return true if we don't have any memory operands which described the memory access done by this instr...
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, unsigned Alignment=1, bool *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void insert(iterator MBBI, MachineBasicBlock *MBB)
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
static bool canGuaranteeTCO(CallingConv::ID CC)
static ArgDescriptor allocateVGPR32Input(CCState &CCInfo)
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Module * getParent()
Get the module that this global value is contained inside of...
LLVM Value Representation.
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
FMA - Perform a * b + c with no intermediate rounding step.
const AMDGPUImagePseudoSourceValue * getImagePSV(const SIInstrInfo &TII, const Value *ImgRsrc)
SDValue getRegister(unsigned Reg, EVT VT)
Address space for region memory. (GDS)
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
bool denormalsEnabledForType(EVT VT) const
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
virtual void finalizeLowering(MachineFunction &MF) const
Execute target specific actions to finalize target lowering.
EVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type...
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
bool hasNoUnsignedWrap() const
SDValue getValueType(EVT)
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
static SDValue adjustLoadValueTypeImpl(SDValue Result, EVT LoadVT, const SDLoc &DL, SelectionDAG &DAG, bool Unpacked)
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
void computeKnownBitsForFrameIndex(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits of FrameIndex FIOp are known to be 0.
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone...
static EVT getEquivalentMemType(LLVMContext &Context, EVT VT)
Primary interface to the complete machine description for the target machine.
bool hasWorkGroupIDZ() const
Type * getElementType() const
const APFloat & getValueAPF() const
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
StringRef - Represent a constant reference to a string, i.e.
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations...
SetCC operator - This evaluates to a true value iff the condition is true.
void RemoveOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
APInt bitcastToAPInt() const
void setWorkItemIDY(ArgDescriptor Arg)
SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const
Split a vector store into 2 stores of half the vector.
#define LLVM_ATTRIBUTE_UNUSED
unsigned getNumOperands() const
bool isTypeDesirableForOp(unsigned Op, EVT VT) const override
Return true if the target has native support for the specified value type and it is 'desirable' to us...
const SDValue & getOperand(unsigned i) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
unsigned getLocReg() const
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Op, int64_t Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object...
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static void Split(std::vector< std::string > &V, StringRef S)
Splits a string of comma separated items in to a vector of strings.
void passSpecialInputs(CallLoweringInfo &CLI, CCState &CCInfo, const SIMachineFunctionInfo &Info, SmallVectorImpl< std::pair< unsigned, SDValue >> &RegsToPass, SmallVectorImpl< SDValue > &MemOpChains, SDValue Chain) const
SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals, bool isThisReturn, SDValue ThisVal) const
void setBytesInStackArgArea(unsigned Bytes)
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
unsigned AllocateReg(unsigned Reg)
AllocateReg - Attempt to allocate one register.
unsigned addWorkGroupIDX()
unsigned getLiveInVirtReg(unsigned PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in physical ...
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand *> NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const MachineOperand & getOperand(unsigned i) const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Perform various unary floating-point operations inspired by libm.
unsigned getMaxPrivateElementSize() const
static void processShaderInputArgs(SmallVectorImpl< ISD::InputArg > &Splits, CallingConv::ID CallConv, ArrayRef< ISD::InputArg > Ins, BitVector &Skipped, FunctionType *FType, SIMachineFunctionInfo *Info)
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
unsigned AllocateStack(unsigned Size, unsigned Align)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
static IntegerType * getInt8Ty(LLVMContext &C)
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Calling convention for AMDGPU code object kernels.
const SDValue & getBasePtr() const
LLVMContext * getContext() const
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself...
PointerType * getType() const
Global values are always pointers.
void initializeSplitCSR(MachineBasicBlock *Entry) const override
Perform necessary initialization to handle a subset of CSRs explicitly via copies.
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Carry-using nodes for multiple precision addition and subtraction.
iterator_range< arg_iterator > args()
bool isStructTy() const
True if this is an instance of StructType.
bool empty() const
empty - Check if the array is empty.
cmpResult compare(const APFloat &RHS) const
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
A wrapper class for inspecting calls to intrinsic functions.
const BasicBlock * getParent() const
unsigned getRegisterByName(const char *RegName, EVT VT, SelectionDAG &DAG) const override
Return the register ID of the name passed in.
bool hasCalls() const
Return true if the current function has any function calls.
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
const fltSemantics & getFltSemantics() const
bool hasFP16Denormals() const
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
This class is used to represent ISD::LOAD nodes.
const SIRegisterInfo * getRegisterInfo() const override