64 #define DEBUG_TYPE "nvptx-lower" 76 cl::desc(
"NVPTX Specific: FMA contraction (0: don't do it" 77 " 1: do it 2: do it aggressively"),
82 cl::desc(
"NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" 83 " IEEE Compliant F32 div.rnd if available."),
88 cl::desc(
"NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
93 cl::desc(
"NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
166 uint64_t StartingOffset = 0) {
176 Offsets->push_back(StartingOffset + 0);
177 Offsets->push_back(StartingOffset + 8);
184 if (
StructType *STy = dyn_cast<StructType>(Ty)) {
187 for(
auto *EI : STy->elements()) {
189 StartingOffset + SL->getElementOffset(ElementNum));
196 for (
unsigned i = 0, e = TempVTs.
size(); i != e; ++i) {
198 uint64_t Off = TempOffsets[i];
207 if (EltVT ==
MVT::f16 && NumElts % 2 == 0) {
211 for (
unsigned j = 0; j != NumElts; ++j) {
240 if (AccessSize > ParamAlignment)
243 if (Offsets[Idx] & (AccessSize - 1))
246 EVT EltVT = ValueVTs[Idx];
250 if (EltSize >= AccessSize)
253 unsigned NumElts = AccessSize / EltSize;
255 if (AccessSize != EltSize * NumElts)
259 if (Idx + NumElts > ValueVTs.
size())
263 if (NumElts != 4 && NumElts != 2)
266 for (
unsigned j = Idx + 1; j < Idx + NumElts; ++j) {
268 if (ValueVTs[j] != EltVT)
272 if (Offsets[j] - Offsets[j - 1] != EltSize)
300 unsigned ParamAlignment) {
307 for (
int I = 0,
E = ValueVTs.
size();
I !=
E; ++
I) {
310 for (
unsigned AccessSize : {16, 8, 4, 2}) {
312 I, AccessSize, ValueVTs, Offsets, ParamAlignment);
321 assert(
I + 1 <
E &&
"Not enough elements.");
327 assert(
I + 3 <
E &&
"Not enough elements.");
591 return "NVPTXISD::CALL";
593 return "NVPTXISD::RET_FLAG";
595 return "NVPTXISD::LOAD_PARAM";
597 return "NVPTXISD::Wrapper";
599 return "NVPTXISD::DeclareParam";
601 return "NVPTXISD::DeclareScalarParam";
603 return "NVPTXISD::DeclareRet";
605 return "NVPTXISD::DeclareScalarRet";
607 return "NVPTXISD::DeclareRetParam";
609 return "NVPTXISD::PrintCall";
611 return "NVPTXISD::PrintConvergentCall";
613 return "NVPTXISD::PrintCallUni";
615 return "NVPTXISD::PrintConvergentCallUni";
617 return "NVPTXISD::LoadParam";
619 return "NVPTXISD::LoadParamV2";
621 return "NVPTXISD::LoadParamV4";
623 return "NVPTXISD::StoreParam";
625 return "NVPTXISD::StoreParamV2";
627 return "NVPTXISD::StoreParamV4";
629 return "NVPTXISD::StoreParamS32";
631 return "NVPTXISD::StoreParamU32";
633 return "NVPTXISD::CallArgBegin";
635 return "NVPTXISD::CallArg";
637 return "NVPTXISD::LastCallArg";
639 return "NVPTXISD::CallArgEnd";
641 return "NVPTXISD::CallVoid";
643 return "NVPTXISD::CallVal";
645 return "NVPTXISD::CallSymbol";
647 return "NVPTXISD::Prototype";
649 return "NVPTXISD::MoveParam";
651 return "NVPTXISD::StoreRetval";
653 return "NVPTXISD::StoreRetvalV2";
655 return "NVPTXISD::StoreRetvalV4";
657 return "NVPTXISD::PseudoUseParam";
659 return "NVPTXISD::RETURN";
661 return "NVPTXISD::CallSeqBegin";
663 return "NVPTXISD::CallSeqEnd";
665 return "NVPTXISD::CallPrototype";
667 return "NVPTXISD::ProxyReg";
669 return "NVPTXISD::LoadV2";
671 return "NVPTXISD::LoadV4";
673 return "NVPTXISD::LDGV2";
675 return "NVPTXISD::LDGV4";
677 return "NVPTXISD::LDUV2";
679 return "NVPTXISD::LDUV4";
681 return "NVPTXISD::StoreV2";
683 return "NVPTXISD::StoreV4";
685 return "NVPTXISD::FUN_SHFL_CLAMP";
687 return "NVPTXISD::FUN_SHFR_CLAMP";
689 return "NVPTXISD::IMAD";
691 return "NVPTXISD::SETP_F16X2";
693 return "NVPTXISD::Dummy";
695 return "NVPTXISD::MUL_WIDE_SIGNED";
697 return "NVPTXISD::MUL_WIDE_UNSIGNED";
701 return "NVPTXISD::Tex1DFloatFloatLevel";
703 return "NVPTXISD::Tex1DFloatFloatGrad";
707 return "NVPTXISD::Tex1DS32FloatLevel";
709 return "NVPTXISD::Tex1DS32FloatGrad";
713 return "NVPTXISD::Tex1DU32FloatLevel";
715 return "NVPTXISD::Tex1DU32FloatGrad";
719 return "NVPTXISD::Tex1DArrayFloatFloatLevel";
721 return "NVPTXISD::Tex1DArrayFloatFloatGrad";
725 return "NVPTXISD::Tex1DArrayS32FloatLevel";
727 return "NVPTXISD::Tex1DArrayS32FloatGrad";
731 return "NVPTXISD::Tex1DArrayU32FloatLevel";
733 return "NVPTXISD::Tex1DArrayU32FloatGrad";
737 return "NVPTXISD::Tex2DFloatFloatLevel";
739 return "NVPTXISD::Tex2DFloatFloatGrad";
743 return "NVPTXISD::Tex2DS32FloatLevel";
745 return "NVPTXISD::Tex2DS32FloatGrad";
749 return "NVPTXISD::Tex2DU32FloatLevel";
751 return "NVPTXISD::Tex2DU32FloatGrad";
755 return "NVPTXISD::Tex2DArrayFloatFloatLevel";
757 return "NVPTXISD::Tex2DArrayFloatFloatGrad";
761 return "NVPTXISD::Tex2DArrayS32FloatLevel";
763 return "NVPTXISD::Tex2DArrayS32FloatGrad";
767 return "NVPTXISD::Tex2DArrayU32FloatLevel";
769 return "NVPTXISD::Tex2DArrayU32FloatGrad";
773 return "NVPTXISD::Tex3DFloatFloatLevel";
775 return "NVPTXISD::Tex3DFloatFloatGrad";
779 return "NVPTXISD::Tex3DS32FloatLevel";
781 return "NVPTXISD::Tex3DS32FloatGrad";
785 return "NVPTXISD::Tex3DU32FloatLevel";
787 return "NVPTXISD::Tex3DU32FloatGrad";
790 return "NVPTXISD::TexCubeFloatFloatLevel";
793 return "NVPTXISD::TexCubeS32FloatLevel";
796 return "NVPTXISD::TexCubeU32FloatLevel";
798 return "NVPTXISD::TexCubeArrayFloatFloat";
800 return "NVPTXISD::TexCubeArrayFloatFloatLevel";
802 return "NVPTXISD::TexCubeArrayS32Float";
804 return "NVPTXISD::TexCubeArrayS32FloatLevel";
806 return "NVPTXISD::TexCubeArrayU32Float";
808 return "NVPTXISD::TexCubeArrayU32FloatLevel";
810 return "NVPTXISD::Tld4R2DFloatFloat";
812 return "NVPTXISD::Tld4G2DFloatFloat";
814 return "NVPTXISD::Tld4B2DFloatFloat";
816 return "NVPTXISD::Tld4A2DFloatFloat";
818 return "NVPTXISD::Tld4R2DS64Float";
820 return "NVPTXISD::Tld4G2DS64Float";
822 return "NVPTXISD::Tld4B2DS64Float";
824 return "NVPTXISD::Tld4A2DS64Float";
826 return "NVPTXISD::Tld4R2DU64Float";
828 return "NVPTXISD::Tld4G2DU64Float";
830 return "NVPTXISD::Tld4B2DU64Float";
832 return "NVPTXISD::Tld4A2DU64Float";
835 return "NVPTXISD::TexUnified1DFloatS32";
837 return "NVPTXISD::TexUnified1DFloatFloat";
839 return "NVPTXISD::TexUnified1DFloatFloatLevel";
841 return "NVPTXISD::TexUnified1DFloatFloatGrad";
843 return "NVPTXISD::TexUnified1DS32S32";
845 return "NVPTXISD::TexUnified1DS32Float";
847 return "NVPTXISD::TexUnified1DS32FloatLevel";
849 return "NVPTXISD::TexUnified1DS32FloatGrad";
851 return "NVPTXISD::TexUnified1DU32S32";
853 return "NVPTXISD::TexUnified1DU32Float";
855 return "NVPTXISD::TexUnified1DU32FloatLevel";
857 return "NVPTXISD::TexUnified1DU32FloatGrad";
859 return "NVPTXISD::TexUnified1DArrayFloatS32";
861 return "NVPTXISD::TexUnified1DArrayFloatFloat";
863 return "NVPTXISD::TexUnified1DArrayFloatFloatLevel";
865 return "NVPTXISD::TexUnified1DArrayFloatFloatGrad";
867 return "NVPTXISD::TexUnified1DArrayS32S32";
869 return "NVPTXISD::TexUnified1DArrayS32Float";
871 return "NVPTXISD::TexUnified1DArrayS32FloatLevel";
873 return "NVPTXISD::TexUnified1DArrayS32FloatGrad";
875 return "NVPTXISD::TexUnified1DArrayU32S32";
877 return "NVPTXISD::TexUnified1DArrayU32Float";
879 return "NVPTXISD::TexUnified1DArrayU32FloatLevel";
881 return "NVPTXISD::TexUnified1DArrayU32FloatGrad";
883 return "NVPTXISD::TexUnified2DFloatS32";
885 return "NVPTXISD::TexUnified2DFloatFloat";
887 return "NVPTXISD::TexUnified2DFloatFloatLevel";
889 return "NVPTXISD::TexUnified2DFloatFloatGrad";
891 return "NVPTXISD::TexUnified2DS32S32";
893 return "NVPTXISD::TexUnified2DS32Float";
895 return "NVPTXISD::TexUnified2DS32FloatLevel";
897 return "NVPTXISD::TexUnified2DS32FloatGrad";
899 return "NVPTXISD::TexUnified2DU32S32";
901 return "NVPTXISD::TexUnified2DU32Float";
903 return "NVPTXISD::TexUnified2DU32FloatLevel";
905 return "NVPTXISD::TexUnified2DU32FloatGrad";
907 return "NVPTXISD::TexUnified2DArrayFloatS32";
909 return "NVPTXISD::TexUnified2DArrayFloatFloat";
911 return "NVPTXISD::TexUnified2DArrayFloatFloatLevel";
913 return "NVPTXISD::TexUnified2DArrayFloatFloatGrad";
915 return "NVPTXISD::TexUnified2DArrayS32S32";
917 return "NVPTXISD::TexUnified2DArrayS32Float";
919 return "NVPTXISD::TexUnified2DArrayS32FloatLevel";
921 return "NVPTXISD::TexUnified2DArrayS32FloatGrad";
923 return "NVPTXISD::TexUnified2DArrayU32S32";
925 return "NVPTXISD::TexUnified2DArrayU32Float";
927 return "NVPTXISD::TexUnified2DArrayU32FloatLevel";
929 return "NVPTXISD::TexUnified2DArrayU32FloatGrad";
931 return "NVPTXISD::TexUnified3DFloatS32";
933 return "NVPTXISD::TexUnified3DFloatFloat";
935 return "NVPTXISD::TexUnified3DFloatFloatLevel";
937 return "NVPTXISD::TexUnified3DFloatFloatGrad";
939 return "NVPTXISD::TexUnified3DS32S32";
941 return "NVPTXISD::TexUnified3DS32Float";
943 return "NVPTXISD::TexUnified3DS32FloatLevel";
945 return "NVPTXISD::TexUnified3DS32FloatGrad";
947 return "NVPTXISD::TexUnified3DU32S32";
949 return "NVPTXISD::TexUnified3DU32Float";
951 return "NVPTXISD::TexUnified3DU32FloatLevel";
953 return "NVPTXISD::TexUnified3DU32FloatGrad";
955 return "NVPTXISD::TexUnifiedCubeFloatFloat";
957 return "NVPTXISD::TexUnifiedCubeFloatFloatLevel";
959 return "NVPTXISD::TexUnifiedCubeS32Float";
961 return "NVPTXISD::TexUnifiedCubeS32FloatLevel";
963 return "NVPTXISD::TexUnifiedCubeU32Float";
965 return "NVPTXISD::TexUnifiedCubeU32FloatLevel";
967 return "NVPTXISD::TexUnifiedCubeArrayFloatFloat";
969 return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel";
971 return "NVPTXISD::TexUnifiedCubeArrayS32Float";
973 return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel";
975 return "NVPTXISD::TexUnifiedCubeArrayU32Float";
977 return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel";
979 return "NVPTXISD::Tld4UnifiedR2DFloatFloat";
981 return "NVPTXISD::Tld4UnifiedG2DFloatFloat";
983 return "NVPTXISD::Tld4UnifiedB2DFloatFloat";
985 return "NVPTXISD::Tld4UnifiedA2DFloatFloat";
987 return "NVPTXISD::Tld4UnifiedR2DS64Float";
989 return "NVPTXISD::Tld4UnifiedG2DS64Float";
991 return "NVPTXISD::Tld4UnifiedB2DS64Float";
993 return "NVPTXISD::Tld4UnifiedA2DS64Float";
995 return "NVPTXISD::Tld4UnifiedR2DU64Float";
997 return "NVPTXISD::Tld4UnifiedG2DU64Float";
999 return "NVPTXISD::Tld4UnifiedB2DU64Float";
1001 return "NVPTXISD::Tld4UnifiedA2DU64Float";
1198 bool Reciprocal)
const {
1200 (Enabled == ReciprocalEstimate::Unspecified && !
usePrecSqrtF32())))
1203 if (ExtraSteps == ReciprocalEstimate::Unspecified)
1219 if (Reciprocal || ExtraSteps > 0) {
1260 assert(isABI &&
"Non-ABI compilation is not supported");
1264 std::stringstream
O;
1273 if (
auto *ITy = dyn_cast<IntegerType>(retTy)) {
1274 size = ITy->getBitWidth();
1277 "Floating point type expected here");
1286 O <<
".param .b" << size <<
" _";
1287 }
else if (isa<PointerType>(retTy)) {
1288 O <<
".param .b" << PtrVT.getSizeInBits() <<
" _";
1291 O <<
".param .align " << retAlignment <<
" .b8 _[" 1303 for (
unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
1304 Type *Ty = Args[i].Ty;
1310 if (!Outs[OIdx].Flags.isByVal()) {
1315 if (!
getAlign(*CallI, i + 1, align))
1318 O <<
".param .align " << align <<
" .b8 ";
1320 O <<
"[" << sz <<
"]";
1324 if (
unsigned len = vtparts.
size())
1331 "type mismatch between callee prototype and arguments");
1334 if (isa<IntegerType>(Ty)) {
1338 }
else if (isa<PointerType>(Ty)) {
1339 sz = PtrVT.getSizeInBits();
1347 O <<
".param .b" << sz <<
" ";
1352 assert(PTy &&
"Param with byval attribute should be a pointer type");
1353 Type *ETy = PTy->getElementType();
1355 unsigned align = Outs[OIdx].Flags.getByValAlign();
1357 O <<
".param .align " << align <<
" .b8 ";
1359 O <<
"[" << sz <<
"]";
1365 unsigned NVPTXTargetLowering::getArgumentAlignment(
SDValue Callee,
1367 Type *Ty,
unsigned Idx,
1377 if (!DirectCallee) {
1381 assert(CalleeI &&
"Call target is not a function or derived value?");
1384 if (isa<CallInst>(CalleeI)) {
1386 if (
getAlign(*cast<CallInst>(CalleeI), Idx, Align))
1389 const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue();
1391 while (isa<ConstantExpr>(CalleeV)) {
1396 CalleeV = cast<ConstantExpr>(CalleeV)->getOperand(0);
1401 if (isa<Function>(CalleeV))
1402 DirectCallee = CalleeV;
1409 if (
getAlign(*cast<Function>(DirectCallee), Idx, Align))
1433 assert(isABI &&
"Non-ABI compilation is not supported");
1441 unsigned paramCount = 0;
1454 for (
unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
1455 EVT VT = Outs[OIdx].VT;
1456 Type *Ty = Args[i].Ty;
1458 if (!Outs[OIdx].Flags.isByVal()) {
1463 getArgumentAlignment(Callee, CS, Ty, paramCount + 1, DL);
1484 SDValue DeclareScalarParamOps[] = {
1489 DeclareScalarParamOps);
1498 bool ExtendIntegerParam =
1503 for (
unsigned j = 0, je = VTs.
size(); j != je; ++j) {
1506 assert(StoreOperands.
empty() &&
"Unfinished preceeding store.");
1513 SDValue StVal = OutVals[OIdx];
1514 if (ExtendIntegerParam) {
1515 assert(VTs.
size() == 1 &&
"Scalar can't have multiple parts.");
1530 unsigned NumElts = StoreOperands.
size() - 3;
1550 EVT TheStoreType = ExtendIntegerParam ?
MVT::i32 : VTs[j];
1561 StoreOperands.
clear();
1565 assert(StoreOperands.
empty() &&
"Unfinished parameter store.");
1576 assert(PTy &&
"Type of a byval parameter should be pointer");
1580 unsigned sz = Outs[OIdx].Flags.getByValSize();
1582 unsigned ArgAlign = Outs[OIdx].Flags.getByValAlign();
1598 for (
unsigned j = 0, je = VTs.
size(); j != je; ++j) {
1599 EVT elemtype = VTs[j];
1600 int curOffset = Offsets[j];
1611 SDValue CopyParamOps[] = { Chain,
1616 CopyParamOps, elemtype,
1626 unsigned retAlignment = 0;
1629 if (Ins.
size() > 0) {
1654 retAlignment = getArgumentAlignment(Callee, CS, RetTy, 0, DL);
1656 SDValue DeclareRetOps[] = { Chain,
1669 bool isIndirectCall = !Func && CS;
1671 if (isa<ExternalSymbolSDNode>(Callee)) {
1676 assert(CalleeFunc !=
nullptr &&
"Libcall callee must be set.");
1680 CalleeFunc->
addFnAttr(
"nvptx-libcall-callee",
"true");
1683 if (isIndirectCall) {
1692 std::string Proto =
getPrototype(DL, RetTy, Args, Outs, retAlignment, CS);
1693 const char *ProtoStr =
1711 Chain = DAG.
getNode(Opcode, dl, PrintCallVTs, PrintCallOps);
1716 SDValue CallVoidOps[] = { Chain, Callee, InFlag };
1722 SDValue CallArgBeginOps[] = { Chain, InFlag };
1727 for (
unsigned i = 0, e = paramCount; i != e; ++i) {
1736 Chain = DAG.
getNode(opcode, dl, CallArgVTs, CallArgOps);
1740 SDValue CallArgEndOps[] = { Chain,
1746 if (isIndirectCall) {
1748 SDValue PrototypeOps[] = { Chain,
1759 if (Ins.
size() > 0) {
1765 unsigned RetAlign = getArgumentAlignment(Callee, CS, RetTy, 0, DL);
1774 bool ExtendIntegerRetVal =
1777 for (
unsigned i = 0, e = VTs.
size(); i != e; ++i) {
1778 bool needTruncate =
false;
1779 EVT TheLoadType = VTs[i];
1780 EVT EltType = Ins[i].VT;
1782 if (ExtendIntegerRetVal) {
1785 needTruncate =
true;
1787 if (VTs[i].isInteger())
1788 needTruncate =
true;
1794 assert(VecIdx == -1 && LoadVTs.
empty() &&
"Orphaned operand list.");
1801 unsigned NumElts = LoadVTs.
size();
1823 Op, dl, DAG.
getVTList(LoadVTs), LoadOperands, TheLoadType,
1827 for (
unsigned j = 0; j < NumElts; ++j) {
1837 InFlag = RetVal.
getValue(NumElts + 1);
1857 for (
unsigned i = 0; i < ProxyRegOps.
size(); ++i) {
1861 { Chain, ProxyRegOps[i], InFlag }
1867 if (ProxyRegTruncates[i].hasValue()) {
1889 for (
unsigned i = 0; i < NumOperands; ++i) {
1894 for (
unsigned j = 0; j < NumSubElem; ++j) {
1921 cast<ConstantFPSDNode>(Op->
getOperand(0))->getValueAPF().bitcastToAPInt();
1923 cast<ConstantFPSDNode>(Op->
getOperand(1))->getValueAPF().bitcastToAPInt();
1933 if (isa<ConstantSDNode>(Index.
getNode()))
2084 return LowerBUILD_VECTOR(Op, DAG);
2088 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2090 return LowerCONCAT_VECTORS(Op, DAG);
2092 return LowerSTORE(Op, DAG);
2094 return LowerLOAD(Op, DAG);
2096 return LowerShiftLeftParts(Op, DAG);
2099 return LowerShiftRightParts(Op, DAG);
2101 return LowerSelect(Op, DAG);
2125 return LowerLOADi1(Op, DAG);
2153 "Custom lowering for i1 load only");
2170 return LowerSTOREi1(Op, DAG);
2180 return LowerSTOREVector(Op, DAG);
2222 unsigned PrefAlign =
2224 if (Align < PrefAlign) {
2233 unsigned Opcode = 0;
2240 bool NeedExt =
false;
2244 bool StoreF16x2 =
false;
2272 for (
unsigned i = 0; i < NumElts; ++i) {
2282 for (
unsigned i = 0; i < NumElts; ++i) {
2325 NVPTXTargetLowering::getParamSymbol(
SelectionDAG &DAG,
int idx,
EVT v)
const {
2326 std::string ParamSym;
2332 std::string *SavedStr =
2340 static const char *
const specialTypes[] = {
"struct._image2d_t",
2341 "struct._image3d_t",
2342 "struct._sampler_t" };
2354 if (!STy || STy->isLiteral())
2358 STy->getName()) !=
std::end(specialTypes);
2374 std::vector<SDValue> OutChains;
2377 assert(isABI &&
"Non-ABI compilation is not supported");
2381 std::vector<Type *> argTypes;
2382 std::vector<const Argument *> theArgs;
2384 theArgs.push_back(&
I);
2385 argTypes.push_back(
I.getType());
2396 unsigned InsIdx = 0;
2399 for (
unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) {
2400 Type *Ty = argTypes[i];
2410 "Only kernels can have image/sampler params");
2415 if (theArgs[i]->use_empty()) {
2421 assert(vtparts.
size() > 0 &&
"empty aggregate type not expected");
2422 for (
unsigned parti = 0, parte = vtparts.
size(); parti != parte;
2427 if (vtparts.
size() > 0)
2434 for (
unsigned parti = 0; parti < NumRegs; ++parti) {
2451 bool aggregateIsPacked =
false;
2452 if (
StructType *STy = dyn_cast<StructType>(Ty))
2453 aggregateIsPacked = STy->isPacked();
2458 assert(VTs.
size() > 0 &&
"Unexpected empty type.");
2462 SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
2464 for (
unsigned parti = 0, parte = VTs.
size(); parti != parte; ++parti) {
2466 assert(VecIdx == -1 &&
"Orphaned vector.");
2471 if (VectorInfo[parti] &
PVF_LAST) {
2472 unsigned NumElts = parti - VecIdx + 1;
2473 EVT EltVT = VTs[parti];
2491 DAG.
getLoad(VecVT, dl, Root, VecAddr,
2497 for (
unsigned j = 0; j < NumElts; ++j) {
2508 if (Ins[InsIdx].VT.isInteger() &&
2512 Elt = DAG.
getNode(Extend, dl, Ins[InsIdx].VT, Elt);
2535 assert(ObjectVT == Ins[InsIdx].VT &&
2536 "Ins type did not match function type");
2537 SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
2552 if (!OutChains.empty())
2568 assert(isABI &&
"Non-ABI compilation is not supported");
2576 assert(VTs.
size() == OutVals.
size() &&
"Bad return value decomposition");
2584 bool ExtendIntegerRetVal =
2588 for (
unsigned i = 0, e = VTs.
size(); i != e; ++i) {
2591 assert(StoreOperands.
empty() &&
"Orphaned operand list.");
2597 if (ExtendIntegerRetVal) {
2613 unsigned NumElts = StoreOperands.
size() - 2;
2630 EVT TheStoreType = ExtendIntegerRetVal ?
MVT::i32 : VTs[i];
2632 StoreOperands, TheStoreType,
2636 StoreOperands.
clear();
2644 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
2646 if (Constraint.length() > 1)
2653 switch (Intrinsic) {
3012 switch (Intrinsic) {
3356 switch (Intrinsic) {
3990 if (Constraint.
size() == 1) {
3991 switch (Constraint[0]) {
4009 std::pair<unsigned, const TargetRegisterClass *>
4013 if (Constraint.
size() == 1) {
4014 switch (Constraint[0]) {
4016 return std::make_pair(0U, &NVPTX::Int1RegsRegClass);
4018 return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
4020 return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
4022 return std::make_pair(0U, &NVPTX::Int32RegsRegClass);
4025 return std::make_pair(0U, &NVPTX::Int64RegsRegClass);
4027 return std::make_pair(0U, &NVPTX::Float32RegsRegClass);
4029 return std::make_pair(0U, &NVPTX::Float64RegsRegClass);
4119 int nonAddCount = 0;
4137 if (orderNo - orderNo2 < 500)
4142 bool opIsLive =
false;
4146 if (isa<ConstantSDNode>(left) || isa<ConstantSDNode>(right))
4153 if (orderNo3 > orderNo) {
4163 if (orderNo3 > orderNo) {
4209 if (isa<ConstantSDNode>(Val)) {
4233 if (MaskVal != 0xff) {
4260 if (AExt.
getNode() !=
nullptr) {
4293 if (U->getOpcode() == DivOpc && U->getOperand(0) == Num &&
4298 DAG.
getNode(DivOpc, DL, VT, Num, Den),
4354 IsSigned = (LHSSign ==
Signed);
4358 const APInt &Val = CI->getAPIntValue();
4360 return Val.
isIntN(OptSize);
4369 return LHSSign == RHSSign;
4391 if (isa<ConstantSDNode>(LHS)) {
4405 if (ShiftAmt.
sge(0) && ShiftAmt.
slt(BitWidth)) {
4406 APInt MulVal =
APInt(BitWidth, 1) << ShiftAmt;
4440 return DCI.
DAG.
getNode(Opc, DL, MulType, TruncLHS, TruncRHS);
4491 DAGCombinerInfo &DCI)
const {
4549 unsigned PrefAlign =
4551 if (Align < PrefAlign) {
4566 bool NeedTrunc =
false;
4572 unsigned Opcode = 0;
4574 bool LoadF16x2 =
false;
4618 for (
unsigned i = 0; i < NumElts; ++i) {
4628 for (
unsigned i = 0; i < NumElts; ++i) {
4651 unsigned IntrinNo = cast<ConstantSDNode>(Intrin.
getNode())->getZExtValue();
4673 bool NeedTrunc =
false;
4679 unsigned Opcode = 0;
4740 for (
unsigned i = 0; i < NumElts; ++i) {
4758 "Custom handling of non-i8 ldu/ldg?");
4775 NewLD.getValue(0)));
4782 void NVPTXTargetLowering::ReplaceNodeResults(
4801 return getDataSection();
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Instances of this class represent a uniqued identifier for a section in the current translation unit...
A parsed version of the target data layout string in and methods for querying it. ...
const_iterator end(StringRef path)
Get end iterator over path.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
EVT getValueType() const
Return the ValueType of the referenced return value.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand...
const GlobalValue * getGlobal() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
This class represents an incoming formal argument to a Function.
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd)...
static unsigned CanMergeParamLoadStoresStartingAt(unsigned Idx, uint32_t AccessSize, const SmallVectorImpl< EVT > &ValueVTs, const SmallVectorImpl< uint64_t > &Offsets, unsigned ParamAlignment)
uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B)
Return the greatest common divisor of the values using Euclid's algorithm.
bool getAlign(const Function &F, unsigned index, unsigned &align)
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
BR_CC - Conditional branch.
This class represents lattice values for constants.
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0...
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
A Module instance is used to store all the information related to an LLVM module. ...
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
unsigned getIROrder() const
Return the node ordering.
const SDValue & getBasePtr() const
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void push_back(const T &Elt)
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
APInt zext(unsigned width) const
Zero extend to a new width.
MachineMemOperand::Flags flags
bool slt(const APInt &RHS) const
Signed less than comparison.
const SDValue & getValue() const
This class represents a function call, abstracting a target machine's calling convention.
std::string getPrototype(const DataLayout &DL, Type *, const ArgListTy &, const SmallVectorImpl< ISD::OutputArg > &, unsigned retAlignment, ImmutableCallSite CS) const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
static bool IsMulWideOperandDemotable(SDValue Op, unsigned OptSize, OperandSignedness &S)
IsMulWideOperandDemotable - Checks if the provided DAG node is an operand that can be demoted to OptS...
unsigned getVectorNumElements() const
const SDValue & getChain() const
static bool isImageOrSamplerVal(const Value *arg, const Module *context)
Function Alias Analysis Results
bool useF32FTZ(const MachineFunction &MF) const
unsigned getAlignment() const
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
unsigned const TargetRegisterInfo * TRI
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target...
bool isInteger() const
Return true if this is an integer or a vector integer type.
[US]{MIN/MAX} - Binary minimum or maximum or signed or unsigned integers.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned char TargetFlags=0)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
bool isVectorTy() const
True if this is an instance of VectorType.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &dl, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array...
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MCSection * SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const override
Value * getArgOperand(unsigned i) const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
static SDValue PerformREMCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOpt::Level OptLevel)
static unsigned int uniqueCallSite
static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS, unsigned OptSize, bool &IsSigned)
AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can be demoted to OptSize bits...
bool hasOneUse() const
Return true if there is exactly one use of this node.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
static cl::opt< bool > UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden, cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."), cl::init(true))
TypeID getTypeID() const
Return the type id for the type.
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
APInt shl(unsigned shiftAmt) const
Left-shift function.
Shift and rotation operations.
static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputePTXValueVTs - For the given Type Ty, returns the set of primitive EVTs that compose it...
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Class to represent struct types.
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
bool isIntegerTy() const
True if this is an instance of IntegerType.
op_iterator op_end() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
This file contains the simple types necessary to represent the attributes associated with functions a...
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
InstrTy * getInstruction() const
void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth)
Tells the code generator which bitwidths to bypass.
The memory access is dereferenceable (i.e., doesn't trap).
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
const DataLayout & getDataLayout() const
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG...
static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results)
ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
This file implements a class to represent arbitrary precision integral constant values and operations...
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
SmallVector< ISD::InputArg, 32 > Ins
int getDivF32Level() const
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
void assign(size_type NumElts, const T &Elt)
bool usePrecSqrtF32() const
A constant value that is initialized with an expression using other constant values.
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
bool isKernelFunction(const Function &F)
Type * getType() const
All values are typed, get the type of this value.
MachineFunction & getMachineFunction() const
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Simple integer binary arithmetic operators.
ManagedStringPool * getManagedStrPool() const
SmallVector< ISD::OutputArg, 32 > Outs
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array...
op_iterator op_begin() const
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors...
static SDValue PerformADDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const NVPTXSubtarget &Subtarget, CodeGenOpt::Level OptLevel)
PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
void setIROrder(unsigned Order)
Set the node ordering.
static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
amdgpu Simplify well known AMD library false Value * Callee
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Analysis containing CSE Info
Class to represent pointers.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
UNDEF - An undefined node.
This class is used to represent ISD::STORE nodes.
static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const NVPTXSubtarget &Subtarget, CodeGenOpt::Level OptLevel)
PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1...
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
AttributeSet getAttributes(unsigned Index) const
The attributes for the specified index are returned.
bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const
const SDValue & getBasePtr() const
initializer< Ty > init(const Ty &Val)
Type * getReturnType() const
Returns the type of the ret val.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
FPOpFusion::FPOpFusionMode AllowFPOpFusion
AllowFPOpFusion - This flag is set by the -fuse-fp-ops=xxx option.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
The instances of the Type class are immutable: once they are created, they are never changed...
static cl::opt< unsigned > FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden, cl::desc("NVPTX Specific: FMA contraction (0: don't do it" " 1: do it 2: do it aggressively"), cl::init(2))
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
static cl::opt< bool > sched4reg("nvptx-sched4reg", cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false))
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
Simple binary floating point operators.
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const SDValue & getOperand(unsigned Num) const
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
const NVPTXTargetLowering * getTargetLowering() const override
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
static unsigned getOpcForSurfaceInstr(unsigned Intrinsic)
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
This class provides iterator support for SDUse operands that use a specific SDNode.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
TRAP - Trapping instruction.
const APInt & getAPIntValue() const
SectionKind - This is a simple POD value that classifies the properties of a section.
static mvt_range vector_valuetypes()
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, unsigned Size=0)
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
bool isIntN(unsigned N) const
Check if this APInt has an N-bits unsigned integer value.
unsigned MaxStoresPerMemmove
Specify maximum bytes of store instructions per memmove call.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results)
std::vector< ArgListEntry > ArgListTy
bool allowUnsafeFPMath(MachineFunction &MF) const
static SDValue PerformSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOpt::Level OptLevel)
PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
This structure contains all information that is necessary for lowering calls.
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
const TargetMachine & getTargetMachine() const
static cl::opt< int > UsePrecDivF32("nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden, cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" " IEEE Compliant F32 div.rnd if available."), cl::init(2))
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
static bool IsPTXVectorType(MVT VT)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
unsigned getAddressSpace() const
The memory access writes data.
static SDValue TryMULWIDECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply of M/2 bits that produces...
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
TokenFactor - This node takes multiple tokens as input and produces a single token result...
const TargetLowering & getTargetLoweringInfo() const
bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const
Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
std::string * getManagedString(const char *S)
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &ExtraSteps, bool &UseOneConst, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
EVT getVectorElementType() const
Given a vector type, return the type of each element.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Module.h This file contains the declarations for the Module class.
NVPTXTargetLowering(const NVPTXTargetMachine &TM, const NVPTXSubtarget &STI)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
bool isAggregateType() const
Return true if the type is an aggregate type.
Byte Swap and Counting operators.
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
This is an abstract virtual class for memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const Function & getFunction() const
Return the LLVM function that this machine code represents.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static mvt_range integer_valuetypes()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT...
EVT getMemoryVT() const
Return the type of the in-memory value.
Class for arbitrary precision integers.
static SDValue PerformMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOpt::Level OptLevel)
PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.
iterator_range< use_iterator > uses()
Select(COND, TRUEVAL, FALSEVAL).
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
ANY_EXTEND - Used for integer types. The high bits are undefined.
static cl::opt< bool > FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden, cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."), cl::init(false))
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors...
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
amdgpu Simplify well known AMD library false Value Value * Arg
The memory access reads data.
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
BR_JT - Jumptable branch.
SmallVector< SDValue, 32 > OutVals
bool isVector() const
Return true if this is a vector value type.
Bitwise operators - logical and, logical or, logical xor.
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
LLVM_NODISCARD bool empty() const
StringRef getValueAsString() const
Return the attribute's value as a string.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
static SDValue PerformSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
PointerUnion< const Value *, const PseudoSourceValue * > ptrVal
Establish a view to a call site for examination.
Flags getFlags() const
Return the raw flags of the source value,.
The memory access always returns the same value (or traps).
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
unsigned getOpcode() const
SDValue getValue(unsigned R) const
unsigned MaxStoresPerMemcpy
Specify maximum bytes of store instructions per memcpy call.
static SmallVector< ParamVectorizationFlags, 16 > VectorizePTXValueVTs(const SmallVectorImpl< EVT > &ValueVTs, const SmallVectorImpl< uint64_t > &Offsets, unsigned ParamAlignment)
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
unsigned getNumRegisters(LLVMContext &Context, EVT VT) const
Return the number of registers that this ValueType will eventually require.
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it's an indirect...
const MachinePointerInfo & getPointerInfo() const
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, unsigned Alignment=1, bool *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
A raw_ostream that writes to an std::string.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
SDValue getSymbolFunctionGlobalAddress(SDValue Op, Function **TargetFunction=nullptr)
Returs an GlobalAddress of the function from the current module with name matching the given External...
Module * getParent()
Get the module that this global value is contained inside of...
LLVM Value Representation.
FMA - Perform a * b + c with no intermediate rounding step.
bool allowFP16Math() const
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
unsigned int getSmVersion() const
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
static const Function * getParent(const Value *V)
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
bool isCast() const
Return true if this is a convert constant expression.
Primary interface to the complete machine description for the target machine.
uint64_t getTypeAllocSizeInBits(Type *Ty) const
Returns the offset in bits between successive objects of the specified type, including alignment padd...
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
~NVPTXTargetObjectFile() override
StringRef - Represent a constant reference to a string, i.e.
SetCC operator - This evaluates to a true value iff the condition is true.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
unsigned getNumOperands() const
const SDValue & getOperand(unsigned i) const
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
Perform various unary floating-point operations inspired by libm.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
LLVMContext * getContext() const
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static unsigned getOpcForTextureInstr(unsigned Intrinsic)
This file describes how to lower LLVM code to machine code.
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
const NVPTXRegisterInfo * getRegisterInfo() const override
This class is used to represent ISD::LOAD nodes.
const NVPTXTargetMachine * nvTM