28 #define DEBUG_TYPE "nvptx-isel" 40 doMulWide = (OptLevel > 0);
48 int NVPTXDAGToDAGISel::getDivF32Level()
const {
52 bool NVPTXDAGToDAGISel::usePrecSqrtF32()
const {
56 bool NVPTXDAGToDAGISel::useF32FTZ()
const {
60 bool NVPTXDAGToDAGISel::allowFMA()
const {
65 bool NVPTXDAGToDAGISel::allowUnsafeFPMath()
const {
70 bool NVPTXDAGToDAGISel::useShortPointers()
const {
76 void NVPTXDAGToDAGISel::Select(
SDNode *
N) {
95 if (tryEXTRACT_VECTOR_ELEMENT(N))
104 if (tryLoadVector(N))
116 if (tryStoreVector(N))
128 if (tryStoreRetval(N))
136 if (tryStoreParam(N))
140 if (tryIntrinsicNoChain(N))
144 if (tryIntrinsicChain(N))
315 if (tryTextureIntrinsic(N))
483 if (trySurfaceIntrinsic(N))
494 SelectAddrSpaceCast(N);
497 if (tryConstantFP16(N))
506 bool NVPTXDAGToDAGISel::tryIntrinsicChain(
SDNode *N) {
507 unsigned IID = cast<ConstantSDNode>(N->
getOperand(1))->getZExtValue();
523 bool NVPTXDAGToDAGISel::tryConstantFP16(
SDNode *N) {
527 cast<ConstantFPSDNode>(N)->getValueAPF(),
SDLoc(N),
MVT::f16);
591 bool NVPTXDAGToDAGISel::SelectSETP_F16X2(
SDNode *N) {
592 unsigned PTXCmpMode =
604 bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(
SDNode *N) {
617 if (U->getOperand(0) != Vector)
620 dyn_cast<ConstantSDNode>(U->getOperand(1))) {
621 if (IdxConst->getZExtValue() == 0)
623 else if (IdxConst->getZExtValue() == 1)
635 unsigned Op = NVPTX::SplitF16x2;
640 Op = NVPTX::SplitI32toF16x2;
647 for (
auto *Node : E0)
649 for (
auto *Node : E1)
661 if (
auto *PT = dyn_cast<PointerType>(Src->
getType())) {
662 switch (PT->getAddressSpace()) {
710 if (
auto *A = dyn_cast<const Argument>(V))
711 return IsKernelFn && A->onlyReadsMemory() && A->hasNoAliasAttr();
712 if (
auto *GV = dyn_cast<const GlobalVariable>(V))
713 return GV->isConstant();
718 bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(
SDNode *N) {
719 unsigned IID = cast<ConstantSDNode>(N->
getOperand(0))->getZExtValue();
724 SelectTexSurfHandle(N);
729 void NVPTXDAGToDAGISel::SelectTexSurfHandle(
SDNode *N) {
737 void NVPTXDAGToDAGISel::SelectAddrSpaceCast(
SDNode *N) {
742 assert(SrcAddrSpace != DstAddrSpace &&
743 "addrspacecast must be between different address spaces");
748 switch (SrcAddrSpace) {
751 Opc = TM.
is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
754 Opc = TM.
is64Bit() ? (useShortPointers() ? NVPTX::cvta_shared_yes_6432
755 : NVPTX::cvta_shared_yes_64)
756 : NVPTX::cvta_shared_yes;
759 Opc = TM.
is64Bit() ? (useShortPointers() ? NVPTX::cvta_const_yes_6432
760 : NVPTX::cvta_const_yes_64)
761 : NVPTX::cvta_const_yes;
764 Opc = TM.
is64Bit() ? (useShortPointers() ? NVPTX::cvta_local_yes_6432
765 : NVPTX::cvta_local_yes_64)
766 : NVPTX::cvta_local_yes;
774 if (SrcAddrSpace != 0)
777 switch (DstAddrSpace) {
780 Opc = TM.
is64Bit() ? NVPTX::cvta_to_global_yes_64
781 : NVPTX::cvta_to_global_yes;
784 Opc = TM.
is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_shared_yes_3264
785 : NVPTX::cvta_to_shared_yes_64)
786 : NVPTX::cvta_to_shared_yes;
789 Opc = TM.
is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_const_yes_3264
790 : NVPTX::cvta_to_const_yes_64)
791 : NVPTX::cvta_to_const_yes;
794 Opc = TM.
is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_local_yes_3264
795 : NVPTX::cvta_to_local_yes_64)
796 : NVPTX::cvta_to_local_yes;
799 Opc = TM.
is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
838 bool NVPTXDAGToDAGISel::tryLoad(
SDNode *N) {
844 SDNode *NVPTXLD =
nullptr;
847 if (PlainLoad && PlainLoad->isIndexed())
850 if (!LoadedVT.isSimple())
867 unsigned int PointerSize =
885 MVT SimpleVT = LoadedVT.getSimpleVT();
889 unsigned int fromType;
899 if (PlainLoad && (PlainLoad->getExtensionType() ==
ISD::SEXTLOAD))
916 if (SelectDirectAddr(N1, Addr)) {
918 TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar, NVPTX::LD_i32_avar,
919 NVPTX::LD_i64_avar, NVPTX::LD_f16_avar, NVPTX::LD_f16x2_avar,
920 NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
923 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
924 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
925 getI32Imm(fromTypeWidth, dl), Addr, Chain };
928 }
else if (PointerSize == 64 ? SelectADDRsi64(N1.
getNode(), N1, Base,
Offset)
930 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
931 NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
932 NVPTX::LD_f16_asi, NVPTX::LD_f16x2_asi,
933 NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
936 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
937 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
938 getI32Imm(fromTypeWidth, dl), Base,
Offset, Chain };
941 }
else if (PointerSize == 64 ? SelectADDRri64(N1.
getNode(), N1, Base,
Offset)
943 if (PointerSize == 64)
945 TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
946 NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, NVPTX::LD_f16_ari_64,
947 NVPTX::LD_f16x2_ari_64, NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
950 TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, NVPTX::LD_i32_ari,
951 NVPTX::LD_i64_ari, NVPTX::LD_f16_ari, NVPTX::LD_f16x2_ari,
952 NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
955 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
956 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
957 getI32Imm(fromTypeWidth, dl), Base,
Offset, Chain };
961 if (PointerSize == 64)
963 TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
964 NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64, NVPTX::LD_f16_areg_64,
965 NVPTX::LD_f16x2_areg_64, NVPTX::LD_f32_areg_64,
966 NVPTX::LD_f64_areg_64);
969 TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg, NVPTX::LD_i32_areg,
970 NVPTX::LD_i64_areg, NVPTX::LD_f16_areg, NVPTX::LD_f16x2_areg,
971 NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
974 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
975 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
976 getI32Imm(fromTypeWidth, dl), N1, Chain };
991 bool NVPTXDAGToDAGISel::tryLoadVector(
SDNode *N) {
1002 if (!LoadedVT.isSimple())
1008 return tryLDGLDU(N);
1011 unsigned int PointerSize =
1023 MVT SimpleVT = LoadedVT.getSimpleVT();
1036 unsigned ExtensionType = cast<ConstantSDNode>(
1071 if (SelectDirectAddr(Op1, Addr)) {
1077 NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1078 NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1079 NVPTX::LDV_f16_v2_avar, NVPTX::LDV_f16x2_v2_avar,
1080 NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
1085 NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar,
None,
1086 NVPTX::LDV_f16_v4_avar, NVPTX::LDV_f16x2_v4_avar,
1087 NVPTX::LDV_f32_v4_avar,
None);
1092 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1093 getI32Imm(
VecType, DL), getI32Imm(FromType, DL),
1094 getI32Imm(FromTypeWidth, DL), Addr, Chain };
1096 }
else if (PointerSize == 64
1104 NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1105 NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1106 NVPTX::LDV_f16_v2_asi, NVPTX::LDV_f16x2_v2_asi,
1107 NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
1112 NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi,
None,
1113 NVPTX::LDV_f16_v4_asi, NVPTX::LDV_f16x2_v4_asi,
1114 NVPTX::LDV_f32_v4_asi,
None);
1119 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1120 getI32Imm(
VecType, DL), getI32Imm(FromType, DL),
1121 getI32Imm(FromTypeWidth, DL), Base,
Offset, Chain };
1123 }
else if (PointerSize == 64
1126 if (PointerSize == 64) {
1133 NVPTX::LDV_i16_v2_ari_64, NVPTX::LDV_i32_v2_ari_64,
1134 NVPTX::LDV_i64_v2_ari_64, NVPTX::LDV_f16_v2_ari_64,
1135 NVPTX::LDV_f16x2_v2_ari_64, NVPTX::LDV_f32_v2_ari_64,
1136 NVPTX::LDV_f64_v2_ari_64);
1141 NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64,
None,
1142 NVPTX::LDV_f16_v4_ari_64, NVPTX::LDV_f16x2_v4_ari_64,
1143 NVPTX::LDV_f32_v4_ari_64,
None);
1152 NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1153 NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1154 NVPTX::LDV_f16_v2_ari, NVPTX::LDV_f16x2_v2_ari,
1155 NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
1160 NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari,
None,
1161 NVPTX::LDV_f16_v4_ari, NVPTX::LDV_f16x2_v4_ari,
1162 NVPTX::LDV_f32_v4_ari,
None);
1168 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1169 getI32Imm(
VecType, DL), getI32Imm(FromType, DL),
1170 getI32Imm(FromTypeWidth, DL), Base,
Offset, Chain };
1174 if (PointerSize == 64) {
1181 NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1182 NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f16_v2_areg_64,
1183 NVPTX::LDV_f16x2_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1184 NVPTX::LDV_f64_v2_areg_64);
1189 NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64,
None,
1190 NVPTX::LDV_f16_v4_areg_64, NVPTX::LDV_f16x2_v4_areg_64,
1191 NVPTX::LDV_f32_v4_areg_64,
None);
1201 NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1202 NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f16_v2_areg,
1203 NVPTX::LDV_f16x2_v2_areg, NVPTX::LDV_f32_v2_areg,
1204 NVPTX::LDV_f64_v2_areg);
1209 NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg,
None,
1210 NVPTX::LDV_f16_v4_areg, NVPTX::LDV_f16x2_v4_areg,
1211 NVPTX::LDV_f32_v4_areg,
None);
1217 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1218 getI32Imm(
VecType, DL), getI32Imm(FromType, DL),
1219 getI32Imm(FromTypeWidth, DL), Op1, Chain };
1230 bool NVPTXDAGToDAGISel::tryLDGLDU(
SDNode *N) {
1241 Mem = cast<MemIntrinsicSDNode>(
N);
1242 unsigned IID = cast<ConstantSDNode>(N->
getOperand(1))->getZExtValue();
1259 Mem = cast<MemSDNode>(
N);
1268 unsigned NumElts = 1;
1274 assert(NumElts % 2 == 0 &&
"Vector must have even number of elements");
1285 for (
unsigned i = 0; i != NumElts; ++i) {
1291 if (SelectDirectAddr(Op1, Addr)) {
1299 NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
1300 NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
1301 NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1302 NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
1303 NVPTX::INT_PTX_LDG_GLOBAL_f16avar,
1304 NVPTX::INT_PTX_LDG_GLOBAL_f16x2avar,
1305 NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1306 NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1309 NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
1310 NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
1311 NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1312 NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
1313 NVPTX::INT_PTX_LDU_GLOBAL_f16avar,
1314 NVPTX::INT_PTX_LDU_GLOBAL_f16x2avar,
1315 NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1316 NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
1321 NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1322 NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1323 NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1324 NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1325 NVPTX::INT_PTX_LDG_G_v2f16_ELE_avar,
1326 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_avar,
1327 NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1328 NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
1332 NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1333 NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1334 NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1335 NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1336 NVPTX::INT_PTX_LDU_G_v2f16_ELE_avar,
1337 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_avar,
1338 NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1339 NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
1344 NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar,
1345 NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1346 NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar,
None,
1347 NVPTX::INT_PTX_LDG_G_v4f16_ELE_avar,
1348 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_avar,
1349 NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar,
None);
1353 NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar,
1354 NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1355 NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar,
None,
1356 NVPTX::INT_PTX_LDU_G_v4f16_ELE_avar,
1357 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_avar,
1358 NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar,
None);
1363 SDValue Ops[] = { Addr, Chain };
1375 NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1376 NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1377 NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1378 NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1379 NVPTX::INT_PTX_LDG_GLOBAL_f16ari64,
1380 NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari64,
1381 NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1382 NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1385 NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1386 NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1387 NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1388 NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1389 NVPTX::INT_PTX_LDU_GLOBAL_f16ari64,
1390 NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari64,
1391 NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1392 NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
1397 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1398 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1399 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1400 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1401 NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari64,
1402 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari64,
1403 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1404 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
1408 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1409 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1410 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1411 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1412 NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari64,
1413 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari64,
1414 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1415 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
1420 NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64,
1421 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1422 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64,
None,
1423 NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari64,
1424 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari64,
1425 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64,
None);
1429 NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64,
1430 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1431 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64,
None,
1432 NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari64,
1433 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari64,
1434 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64,
None);
1445 NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
1446 NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
1447 NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1448 NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
1449 NVPTX::INT_PTX_LDG_GLOBAL_f16ari,
1450 NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari,
1451 NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1452 NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1455 NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
1456 NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
1457 NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1458 NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
1459 NVPTX::INT_PTX_LDU_GLOBAL_f16ari,
1460 NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari,
1461 NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1462 NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
1467 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1468 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1469 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1470 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1471 NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari32,
1472 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari32,
1473 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1474 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
1478 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1479 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1480 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1481 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1482 NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari32,
1483 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari32,
1484 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1485 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
1490 NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32,
1491 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1492 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32,
None,
1493 NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari32,
1494 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari32,
1495 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32,
None);
1499 NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32,
1500 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1501 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32,
None,
1502 NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari32,
1503 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari32,
1504 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32,
None);
1521 NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1522 NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1523 NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1524 NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1525 NVPTX::INT_PTX_LDG_GLOBAL_f16areg64,
1526 NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg64,
1527 NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1528 NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1531 NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1532 NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1533 NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1534 NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1535 NVPTX::INT_PTX_LDU_GLOBAL_f16areg64,
1536 NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg64,
1537 NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1538 NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
1543 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1544 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1545 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1546 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1547 NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg64,
1548 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg64,
1549 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1550 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
1554 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1555 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1556 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1557 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1558 NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg64,
1559 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg64,
1560 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1561 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
1566 NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64,
1567 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1568 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64,
None,
1569 NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg64,
1570 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg64,
1571 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64,
None);
1575 NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64,
1576 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1577 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64,
None,
1578 NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg64,
1579 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg64,
1580 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64,
None);
1591 NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1592 NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1593 NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1594 NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1595 NVPTX::INT_PTX_LDG_GLOBAL_f16areg,
1596 NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg,
1597 NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1598 NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1601 NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1602 NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1603 NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1604 NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1605 NVPTX::INT_PTX_LDU_GLOBAL_f16areg,
1606 NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg,
1607 NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1608 NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
1613 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1614 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1615 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1616 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1617 NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg32,
1618 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg32,
1619 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1620 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
1624 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1625 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1626 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1627 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1628 NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg32,
1629 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg32,
1630 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1631 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
1636 NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32,
1637 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1638 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32,
None,
1639 NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg32,
1640 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg32,
1641 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32,
None);
1645 NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32,
1646 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1647 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32,
None,
1648 NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg32,
1649 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg32,
1650 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32,
None);
1656 SDValue Ops[] = { Op1, Chain };
1677 if (OrigType != EltVT && LdNode) {
1682 unsigned CvtOpc = GetConvertOpcode(OrigType.
getSimpleVT(),
1687 for (
unsigned i = 0; i != NumElts; ++i) {
1703 bool NVPTXDAGToDAGISel::tryStore(
SDNode *N) {
1709 assert((PlainStore || AtomicStore) &&
"Expected store");
1711 SDNode *NVPTXST =
nullptr;
1714 if (PlainStore && PlainStore->isIndexed())
1730 unsigned int PointerSize =
1757 unsigned int toType;
1767 SDValue Value = PlainStore ? PlainStore->getValue() : AtomicStore->getVal();
1775 if (SelectDirectAddr(BasePtr, Addr)) {
1776 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
1777 NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1778 NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar,
1779 NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1783 getI32Imm(isVolatile, dl),
1784 getI32Imm(CodeAddrSpace, dl),
1785 getI32Imm(vecType, dl),
1786 getI32Imm(toType, dl),
1787 getI32Imm(toTypeWidth, dl),
1791 }
else if (PointerSize == 64
1794 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
1795 NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1796 NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi,
1797 NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1801 getI32Imm(isVolatile, dl),
1802 getI32Imm(CodeAddrSpace, dl),
1803 getI32Imm(vecType, dl),
1804 getI32Imm(toType, dl),
1805 getI32Imm(toTypeWidth, dl),
1810 }
else if (PointerSize == 64
1813 if (PointerSize == 64)
1815 SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
1816 NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, NVPTX::ST_f16_ari_64,
1817 NVPTX::ST_f16x2_ari_64, NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
1819 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
1820 NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
1821 NVPTX::ST_f16_ari, NVPTX::ST_f16x2_ari,
1822 NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
1827 getI32Imm(isVolatile, dl),
1828 getI32Imm(CodeAddrSpace, dl),
1829 getI32Imm(vecType, dl),
1830 getI32Imm(toType, dl),
1831 getI32Imm(toTypeWidth, dl),
1837 if (PointerSize == 64)
1839 pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
1840 NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
1841 NVPTX::ST_f16_areg_64, NVPTX::ST_f16x2_areg_64,
1842 NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
1844 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
1845 NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
1846 NVPTX::ST_f16_areg, NVPTX::ST_f16x2_areg,
1847 NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1851 getI32Imm(isVolatile, dl),
1852 getI32Imm(CodeAddrSpace, dl),
1853 getI32Imm(vecType, dl),
1854 getI32Imm(toType, dl),
1855 getI32Imm(toTypeWidth, dl),
1870 bool NVPTXDAGToDAGISel::tryStoreVector(
SDNode *N) {
1887 unsigned int PointerSize =
1900 assert(StoreVT.isSimple() &&
"Store value is not simple");
1904 if (ScalarVT.isFloatingPoint())
1943 StOps.push_back(getI32Imm(IsVolatile, DL));
1944 StOps.push_back(getI32Imm(CodeAddrSpace, DL));
1945 StOps.push_back(getI32Imm(
VecType, DL));
1946 StOps.push_back(getI32Imm(ToType, DL));
1947 StOps.push_back(getI32Imm(ToTypeWidth, DL));
1949 if (SelectDirectAddr(N2, Addr)) {
1955 NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
1956 NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
1957 NVPTX::STV_f16_v2_avar, NVPTX::STV_f16x2_v2_avar,
1958 NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
1963 NVPTX::STV_i16_v4_avar, NVPTX::STV_i32_v4_avar,
None,
1964 NVPTX::STV_f16_v4_avar, NVPTX::STV_f16x2_v4_avar,
1965 NVPTX::STV_f32_v4_avar,
None);
1968 StOps.push_back(Addr);
1969 }
else if (PointerSize == 64 ? SelectADDRsi64(N2.getNode(), N2, Base,
Offset)
1970 : SelectADDRsi(N2.getNode(), N2, Base,
Offset)) {
1976 NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
1977 NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
1978 NVPTX::STV_f16_v2_asi, NVPTX::STV_f16x2_v2_asi,
1979 NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
1984 NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi,
None,
1985 NVPTX::STV_f16_v4_asi, NVPTX::STV_f16x2_v4_asi,
1986 NVPTX::STV_f32_v4_asi,
None);
1989 StOps.push_back(Base);
1990 StOps.push_back(Offset);
1991 }
else if (PointerSize == 64 ? SelectADDRri64(N2.getNode(), N2, Base,
Offset)
1992 : SelectADDRri(N2.getNode(), N2, Base,
Offset)) {
1993 if (PointerSize == 64) {
2000 NVPTX::STV_i16_v2_ari_64, NVPTX::STV_i32_v2_ari_64,
2001 NVPTX::STV_i64_v2_ari_64, NVPTX::STV_f16_v2_ari_64,
2002 NVPTX::STV_f16x2_v2_ari_64, NVPTX::STV_f32_v2_ari_64,
2003 NVPTX::STV_f64_v2_ari_64);
2008 NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64,
None,
2009 NVPTX::STV_f16_v4_ari_64, NVPTX::STV_f16x2_v4_ari_64,
2010 NVPTX::STV_f32_v4_ari_64,
None);
2019 NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
2020 NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
2021 NVPTX::STV_f16_v2_ari, NVPTX::STV_f16x2_v2_ari,
2022 NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
2027 NVPTX::STV_i16_v4_ari, NVPTX::STV_i32_v4_ari,
None,
2028 NVPTX::STV_f16_v4_ari, NVPTX::STV_f16x2_v4_ari,
2029 NVPTX::STV_f32_v4_ari,
None);
2033 StOps.push_back(Base);
2034 StOps.push_back(Offset);
2036 if (PointerSize == 64) {
2043 NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
2044 NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f16_v2_areg_64,
2045 NVPTX::STV_f16x2_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
2046 NVPTX::STV_f64_v2_areg_64);
2051 NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64,
None,
2052 NVPTX::STV_f16_v4_areg_64, NVPTX::STV_f16x2_v4_areg_64,
2053 NVPTX::STV_f32_v4_areg_64,
None);
2063 NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
2064 NVPTX::STV_i64_v2_areg, NVPTX::STV_f16_v2_areg,
2065 NVPTX::STV_f16x2_v2_areg, NVPTX::STV_f32_v2_areg,
2066 NVPTX::STV_f64_v2_areg);
2071 NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg,
None,
2072 NVPTX::STV_f16_v4_areg, NVPTX::STV_f16x2_v4_areg,
2073 NVPTX::STV_f32_v4_areg,
None);
2077 StOps.push_back(N2);
2083 StOps.push_back(Chain);
2094 bool NVPTXDAGToDAGISel::tryLoadParam(
SDNode *Node) {
2126 NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
2127 NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
2128 NVPTX::LoadParamMemF16, NVPTX::LoadParamMemF16x2,
2129 NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
2134 NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
2135 NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F16,
2136 NVPTX::LoadParamMemV2F16x2, NVPTX::LoadParamMemV2F32,
2137 NVPTX::LoadParamMemV2F64);
2142 NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32,
None,
2143 NVPTX::LoadParamMemV4F16, NVPTX::LoadParamMemV4F16x2,
2144 NVPTX::LoadParamMemV4F32,
None);
2153 }
else if (VecSize == 2) {
2160 unsigned OffsetVal = cast<ConstantSDNode>(
Offset)->getZExtValue();
2171 bool NVPTXDAGToDAGISel::tryStoreRetval(
SDNode *N) {
2175 unsigned OffsetVal = cast<ConstantSDNode>(
Offset)->getZExtValue();
2179 unsigned NumElts = 1;
2196 for (
unsigned i = 0; i < NumElts; ++i)
2210 NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
2211 NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
2212 NVPTX::StoreRetvalF16, NVPTX::StoreRetvalF16x2,
2213 NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
2217 NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2218 NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2219 NVPTX::StoreRetvalV2F16, NVPTX::StoreRetvalV2F16x2,
2220 NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
2224 NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2225 NVPTX::StoreRetvalV4I32,
None,
2226 NVPTX::StoreRetvalV4F16, NVPTX::StoreRetvalV4F16x2,
2227 NVPTX::StoreRetvalV4F32,
None);
2241 bool NVPTXDAGToDAGISel::tryStoreParam(
SDNode *N) {
2245 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2247 unsigned OffsetVal = cast<ConstantSDNode>(
Offset)->getZExtValue();
2252 unsigned NumElts = 1;
2271 for (
unsigned i = 0; i < NumElts; ++i)
2289 NVPTX::StoreParamI8, NVPTX::StoreParamI16,
2290 NVPTX::StoreParamI32, NVPTX::StoreParamI64,
2291 NVPTX::StoreParamF16, NVPTX::StoreParamF16x2,
2292 NVPTX::StoreParamF32, NVPTX::StoreParamF64);
2296 NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
2297 NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
2298 NVPTX::StoreParamV2F16, NVPTX::StoreParamV2F16x2,
2299 NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
2303 NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
2304 NVPTX::StoreParamV4I32,
None,
2305 NVPTX::StoreParamV4F16, NVPTX::StoreParamV4F16x2,
2306 NVPTX::StoreParamV4F32,
None);
2316 Opcode = NVPTX::StoreParamI32;
2325 Opcode = NVPTX::StoreParamI32;
2345 bool NVPTXDAGToDAGISel::tryTextureIntrinsic(
SDNode *N) {
2349 default:
return false;
2351 Opc = NVPTX::TEX_1D_F32_S32;
2354 Opc = NVPTX::TEX_1D_F32_F32;
2357 Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
2360 Opc = NVPTX::TEX_1D_F32_F32_GRAD;
2363 Opc = NVPTX::TEX_1D_S32_S32;
2366 Opc = NVPTX::TEX_1D_S32_F32;
2369 Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
2372 Opc = NVPTX::TEX_1D_S32_F32_GRAD;
2375 Opc = NVPTX::TEX_1D_U32_S32;
2378 Opc = NVPTX::TEX_1D_U32_F32;
2381 Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
2384 Opc = NVPTX::TEX_1D_U32_F32_GRAD;
2387 Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
2390 Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
2393 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
2396 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
2399 Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
2402 Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
2405 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
2408 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
2411 Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
2414 Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
2417 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
2420 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
2423 Opc = NVPTX::TEX_2D_F32_S32;
2426 Opc = NVPTX::TEX_2D_F32_F32;
2429 Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
2432 Opc = NVPTX::TEX_2D_F32_F32_GRAD;
2435 Opc = NVPTX::TEX_2D_S32_S32;
2438 Opc = NVPTX::TEX_2D_S32_F32;
2441 Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
2444 Opc = NVPTX::TEX_2D_S32_F32_GRAD;
2447 Opc = NVPTX::TEX_2D_U32_S32;
2450 Opc = NVPTX::TEX_2D_U32_F32;
2453 Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
2456 Opc = NVPTX::TEX_2D_U32_F32_GRAD;
2459 Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
2462 Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
2465 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
2468 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
2471 Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
2474 Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
2477 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
2480 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
2483 Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
2486 Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
2489 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
2492 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
2495 Opc = NVPTX::TEX_3D_F32_S32;
2498 Opc = NVPTX::TEX_3D_F32_F32;
2501 Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
2504 Opc = NVPTX::TEX_3D_F32_F32_GRAD;
2507 Opc = NVPTX::TEX_3D_S32_S32;
2510 Opc = NVPTX::TEX_3D_S32_F32;
2513 Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
2516 Opc = NVPTX::TEX_3D_S32_F32_GRAD;
2519 Opc = NVPTX::TEX_3D_U32_S32;
2522 Opc = NVPTX::TEX_3D_U32_F32;
2525 Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
2528 Opc = NVPTX::TEX_3D_U32_F32_GRAD;
2531 Opc = NVPTX::TEX_CUBE_F32_F32;
2534 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
2537 Opc = NVPTX::TEX_CUBE_S32_F32;
2540 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
2543 Opc = NVPTX::TEX_CUBE_U32_F32;
2546 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
2549 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
2552 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
2555 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
2558 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
2561 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
2564 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
2567 Opc = NVPTX::TLD4_R_2D_F32_F32;
2570 Opc = NVPTX::TLD4_G_2D_F32_F32;
2573 Opc = NVPTX::TLD4_B_2D_F32_F32;
2576 Opc = NVPTX::TLD4_A_2D_F32_F32;
2579 Opc = NVPTX::TLD4_R_2D_S32_F32;
2582 Opc = NVPTX::TLD4_G_2D_S32_F32;
2585 Opc = NVPTX::TLD4_B_2D_S32_F32;
2588 Opc = NVPTX::TLD4_A_2D_S32_F32;
2591 Opc = NVPTX::TLD4_R_2D_U32_F32;
2594 Opc = NVPTX::TLD4_G_2D_U32_F32;
2597 Opc = NVPTX::TLD4_B_2D_U32_F32;
2600 Opc = NVPTX::TLD4_A_2D_U32_F32;
2603 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
2606 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
2609 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
2612 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
2615 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
2618 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
2621 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
2624 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
2627 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
2630 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
2633 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
2636 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
2639 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
2642 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
2645 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
2648 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
2651 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
2654 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
2657 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
2660 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
2663 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
2666 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
2669 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
2672 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
2675 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
2678 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
2681 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
2684 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
2687 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
2690 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
2693 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
2696 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
2699 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
2702 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
2705 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
2708 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
2711 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
2714 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
2717 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
2720 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
2723 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
2726 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
2729 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
2732 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
2735 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
2738 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
2741 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
2744 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
2747 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
2750 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
2753 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
2756 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
2759 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
2762 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
2765 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
2768 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
2771 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
2774 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
2777 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
2780 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
2783 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
2786 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
2789 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
2792 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
2795 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
2798 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
2801 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
2804 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
2807 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
2810 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
2813 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
2816 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
2819 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
2822 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
2825 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
2828 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
2831 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
2834 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
2837 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
2840 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
2843 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
2846 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
2849 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
2852 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
2864 bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(
SDNode *N) {
2867 default:
return false;
2869 Opc = NVPTX::SULD_1D_I8_CLAMP;
2872 Opc = NVPTX::SULD_1D_I16_CLAMP;
2875 Opc = NVPTX::SULD_1D_I32_CLAMP;
2878 Opc = NVPTX::SULD_1D_I64_CLAMP;
2881 Opc = NVPTX::SULD_1D_V2I8_CLAMP;
2884 Opc = NVPTX::SULD_1D_V2I16_CLAMP;
2887 Opc = NVPTX::SULD_1D_V2I32_CLAMP;
2890 Opc = NVPTX::SULD_1D_V2I64_CLAMP;
2893 Opc = NVPTX::SULD_1D_V4I8_CLAMP;
2896 Opc = NVPTX::SULD_1D_V4I16_CLAMP;
2899 Opc = NVPTX::SULD_1D_V4I32_CLAMP;
2902 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
2905 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
2908 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
2911 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
2914 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
2917 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
2920 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
2923 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
2926 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
2929 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
2932 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
2935 Opc = NVPTX::SULD_2D_I8_CLAMP;
2938 Opc = NVPTX::SULD_2D_I16_CLAMP;
2941 Opc = NVPTX::SULD_2D_I32_CLAMP;
2944 Opc = NVPTX::SULD_2D_I64_CLAMP;
2947 Opc = NVPTX::SULD_2D_V2I8_CLAMP;
2950 Opc = NVPTX::SULD_2D_V2I16_CLAMP;
2953 Opc = NVPTX::SULD_2D_V2I32_CLAMP;
2956 Opc = NVPTX::SULD_2D_V2I64_CLAMP;
2959 Opc = NVPTX::SULD_2D_V4I8_CLAMP;
2962 Opc = NVPTX::SULD_2D_V4I16_CLAMP;
2965 Opc = NVPTX::SULD_2D_V4I32_CLAMP;
2968 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
2971 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
2974 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
2977 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
2980 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
2983 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
2986 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
2989 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
2992 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
2995 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
2998 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
3001 Opc = NVPTX::SULD_3D_I8_CLAMP;
3004 Opc = NVPTX::SULD_3D_I16_CLAMP;
3007 Opc = NVPTX::SULD_3D_I32_CLAMP;
3010 Opc = NVPTX::SULD_3D_I64_CLAMP;
3013 Opc = NVPTX::SULD_3D_V2I8_CLAMP;
3016 Opc = NVPTX::SULD_3D_V2I16_CLAMP;
3019 Opc = NVPTX::SULD_3D_V2I32_CLAMP;
3022 Opc = NVPTX::SULD_3D_V2I64_CLAMP;
3025 Opc = NVPTX::SULD_3D_V4I8_CLAMP;
3028 Opc = NVPTX::SULD_3D_V4I16_CLAMP;
3031 Opc = NVPTX::SULD_3D_V4I32_CLAMP;
3034 Opc = NVPTX::SULD_1D_I8_TRAP;
3037 Opc = NVPTX::SULD_1D_I16_TRAP;
3040 Opc = NVPTX::SULD_1D_I32_TRAP;
3043 Opc = NVPTX::SULD_1D_I64_TRAP;
3046 Opc = NVPTX::SULD_1D_V2I8_TRAP;
3049 Opc = NVPTX::SULD_1D_V2I16_TRAP;
3052 Opc = NVPTX::SULD_1D_V2I32_TRAP;
3055 Opc = NVPTX::SULD_1D_V2I64_TRAP;
3058 Opc = NVPTX::SULD_1D_V4I8_TRAP;
3061 Opc = NVPTX::SULD_1D_V4I16_TRAP;
3064 Opc = NVPTX::SULD_1D_V4I32_TRAP;
3067 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
3070 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
3073 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
3076 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
3079 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
3082 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
3085 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
3088 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
3091 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
3094 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
3097 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
3100 Opc = NVPTX::SULD_2D_I8_TRAP;
3103 Opc = NVPTX::SULD_2D_I16_TRAP;
3106 Opc = NVPTX::SULD_2D_I32_TRAP;
3109 Opc = NVPTX::SULD_2D_I64_TRAP;
3112 Opc = NVPTX::SULD_2D_V2I8_TRAP;
3115 Opc = NVPTX::SULD_2D_V2I16_TRAP;
3118 Opc = NVPTX::SULD_2D_V2I32_TRAP;
3121 Opc = NVPTX::SULD_2D_V2I64_TRAP;
3124 Opc = NVPTX::SULD_2D_V4I8_TRAP;
3127 Opc = NVPTX::SULD_2D_V4I16_TRAP;
3130 Opc = NVPTX::SULD_2D_V4I32_TRAP;
3133 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
3136 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
3139 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
3142 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
3145 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
3148 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
3151 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
3154 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
3157 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
3160 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
3163 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
3166 Opc = NVPTX::SULD_3D_I8_TRAP;
3169 Opc = NVPTX::SULD_3D_I16_TRAP;
3172 Opc = NVPTX::SULD_3D_I32_TRAP;
3175 Opc = NVPTX::SULD_3D_I64_TRAP;
3178 Opc = NVPTX::SULD_3D_V2I8_TRAP;
3181 Opc = NVPTX::SULD_3D_V2I16_TRAP;
3184 Opc = NVPTX::SULD_3D_V2I32_TRAP;
3187 Opc = NVPTX::SULD_3D_V2I64_TRAP;
3190 Opc = NVPTX::SULD_3D_V4I8_TRAP;
3193 Opc = NVPTX::SULD_3D_V4I16_TRAP;
3196 Opc = NVPTX::SULD_3D_V4I32_TRAP;
3199 Opc = NVPTX::SULD_1D_I8_ZERO;
3202 Opc = NVPTX::SULD_1D_I16_ZERO;
3205 Opc = NVPTX::SULD_1D_I32_ZERO;
3208 Opc = NVPTX::SULD_1D_I64_ZERO;
3211 Opc = NVPTX::SULD_1D_V2I8_ZERO;
3214 Opc = NVPTX::SULD_1D_V2I16_ZERO;
3217 Opc = NVPTX::SULD_1D_V2I32_ZERO;
3220 Opc = NVPTX::SULD_1D_V2I64_ZERO;
3223 Opc = NVPTX::SULD_1D_V4I8_ZERO;
3226 Opc = NVPTX::SULD_1D_V4I16_ZERO;
3229 Opc = NVPTX::SULD_1D_V4I32_ZERO;
3232 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
3235 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
3238 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
3241 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
3244 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
3247 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
3250 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
3253 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
3256 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
3259 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
3262 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
3265 Opc = NVPTX::SULD_2D_I8_ZERO;
3268 Opc = NVPTX::SULD_2D_I16_ZERO;
3271 Opc = NVPTX::SULD_2D_I32_ZERO;
3274 Opc = NVPTX::SULD_2D_I64_ZERO;
3277 Opc = NVPTX::SULD_2D_V2I8_ZERO;
3280 Opc = NVPTX::SULD_2D_V2I16_ZERO;
3283 Opc = NVPTX::SULD_2D_V2I32_ZERO;
3286 Opc = NVPTX::SULD_2D_V2I64_ZERO;
3289 Opc = NVPTX::SULD_2D_V4I8_ZERO;
3292 Opc = NVPTX::SULD_2D_V4I16_ZERO;
3295 Opc = NVPTX::SULD_2D_V4I32_ZERO;
3298 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
3301 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
3304 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
3307 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
3310 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
3313 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
3316 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
3319 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
3322 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
3325 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
3328 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
3331 Opc = NVPTX::SULD_3D_I8_ZERO;
3334 Opc = NVPTX::SULD_3D_I16_ZERO;
3337 Opc = NVPTX::SULD_3D_I32_ZERO;
3340 Opc = NVPTX::SULD_3D_I64_ZERO;
3343 Opc = NVPTX::SULD_3D_V2I8_ZERO;
3346 Opc = NVPTX::SULD_3D_V2I16_ZERO;
3349 Opc = NVPTX::SULD_3D_V2I32_ZERO;
3352 Opc = NVPTX::SULD_3D_V2I64_ZERO;
3355 Opc = NVPTX::SULD_3D_V4I8_ZERO;
3358 Opc = NVPTX::SULD_3D_V4I16_ZERO;
3361 Opc = NVPTX::SULD_3D_V4I32_ZERO;
3376 bool NVPTXDAGToDAGISel::tryBFE(
SDNode *N) {
3383 bool IsSigned =
false;
3388 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
3421 if (NumBits > GoodBits) {
3455 if (isa<ConstantSDNode>(AndLHS)) {
3479 NumBits = NumZeros + NumOnes - ShiftAmt;
3485 if (ShiftAmt < NumZeros) {
3521 if (OuterShiftAmt < InnerShiftAmt) {
3557 Opc = NVPTX::BFE_S32rii;
3559 Opc = NVPTX::BFE_U32rii;
3563 Opc = NVPTX::BFE_S64rii;
3565 Opc = NVPTX::BFE_U64rii;
3598 return SelectDirectAddr(CastN->getOperand(0).getOperand(0),
Address);
3604 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3609 if (SelectDirectAddr(base, Base)) {
3620 bool NVPTXDAGToDAGISel::SelectADDRsi(
SDNode *OpNode,
SDValue Addr,
3622 return SelectADDRsi_imp(OpNode, Addr, Base, Offset,
MVT::i32);
3626 bool NVPTXDAGToDAGISel::SelectADDRsi64(
SDNode *OpNode,
SDValue Addr,
3628 return SelectADDRsi_imp(OpNode, Addr, Base, Offset,
MVT::i64);
3632 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3644 if (SelectDirectAddr(Addr.
getOperand(0), Addr)) {
3649 dyn_cast<FrameIndexSDNode>(Addr.
getOperand(0)))
3663 bool NVPTXDAGToDAGISel::SelectADDRri(
SDNode *OpNode,
SDValue Addr,
3665 return SelectADDRri_imp(OpNode, Addr, Base, Offset,
MVT::i32);
3669 bool NVPTXDAGToDAGISel::SelectADDRri64(
SDNode *OpNode,
SDValue Addr,
3671 return SelectADDRri_imp(OpNode, Addr, Base, Offset,
MVT::i64);
3674 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(
SDNode *N,
3675 unsigned int spN)
const {
3676 const Value *Src =
nullptr;
3677 if (
MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
3678 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3680 Src = mN->getMemOperand()->getValue();
3684 if (
auto *PT = dyn_cast<PointerType>(Src->
getType()))
3685 return (PT->getAddressSpace() == spN);
3692 const SDValue &
Op,
unsigned ConstraintID, std::vector<SDValue> &OutOps) {
3694 switch (ConstraintID) {
3698 if (SelectDirectAddr(Op, Op0)) {
3699 OutOps.push_back(Op0);
3703 if (SelectADDRri(Op.
getNode(),
Op, Op0, Op1)) {
3704 OutOps.push_back(Op0);
3705 OutOps.push_back(Op1);
3715 unsigned NVPTXDAGToDAGISel::GetConvertOpcode(
MVT DestTy,
MVT SrcTy,
3725 return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
3727 return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
3729 return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
3736 return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
3738 return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
3740 return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
3747 return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
3749 return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
3751 return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
3758 return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
3760 return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
3762 return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
EVT getValueType() const
Return the ValueType of the referenced return value.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
Atomic ordering constants.
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
This class represents lattice values for constants.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
bool isVector() const
Return true if this is a vector value type.
void push_back(const T &Elt)
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
static Optional< unsigned > pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16, unsigned Opcode_i32, Optional< unsigned > Opcode_i64, unsigned Opcode_f16, unsigned Opcode_f16x2, unsigned Opcode_f32, Optional< unsigned > Opcode_f64)
SDVTList getVTList() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
const SDValue & getChain() const
bool useF32FTZ(const MachineFunction &MF) const
static unsigned int getCodeAddrSpace(MemSDNode *N)
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ)
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const NVPTXSubtarget * Subtarget
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
unsigned getAddressSpace() const
Return the address space for the associated pointer.
NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, CodeGenOpt::Level OptLevel)
A description of a memory reference used in the backend.
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
Shift and rotation operations.
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
op_iterator op_end() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
const DataLayout & getDataLayout() const
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
This is an SDNode representing atomic operations.
AtomicOrdering getOrdering() const
Return the atomic ordering requirements for this memory operation.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
int getDivF32Level() const
AtomicOrdering
Atomic ordering for LLVM's memory model.
unsigned getSizeInBits() const
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool usePrecSqrtF32() const
bool isKernelFunction(const Function &F)
Type * getType() const
All values are typed, get the type of this value.
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
SDValue getTargetFrameIndex(int FI, EVT VT)
const T & getValue() const LLVM_LVALUE_FUNCTION
Simple integer binary arithmetic operators.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
op_iterator op_begin() const
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Implement addressing mode selection for inline asm expressions...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
bool isStrongerThanMonotonic(AtomicOrdering ao)
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
SDValue getTargetConstantFP(double Val, const SDLoc &DL, EVT VT)
unsigned getSrcAddressSpace() const
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
This class is used to represent ISD::STORE nodes.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
const Value * getValue() const
Return the base address of the memory access.
bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const
CodeGenOpt::Level OptLevel
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
const SDValue & getOperand(unsigned Num) const
const NVPTXTargetLowering * getTargetLowering() const override
unsigned getDestAddressSpace() const
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, unsigned CodeAddrSpace, MachineFunction *F)
static ManagedStatic< std::set< EVT, EVT::compareRawBits > > EVTs
FunctionPass class - This class is used to implement most global optimizations.
bool useShortPointers() const
bool allowUnsafeFPMath(MachineFunction &MF) const
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
EVT getVectorElementType() const
Given a vector type, return the type of each element.
ADDRSPACECAST - This operator converts between pointers of different address spaces.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
This is an abstract virtual class for memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
EVT getMemoryVT() const
Return the type of the in-memory value.
iterator_range< use_iterator > uses()
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
bool isVector() const
Return true if this is a vector value type.
FunctionPass * createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel)
createNVPTXISelDag - This pass converts a legalized DAG into a NVPTX-specific DAG, ready for instruction scheduling.
Bitwise operators - logical and, logical or, logical xor.
LLVM_NODISCARD bool empty() const
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
ISD::CondCode get() const
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
unsigned getOpcode() const
void GetUnderlyingObjects(Value *V, SmallVectorImpl< Value *> &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
static bool isVolatile(Instruction *Inst)
const SDValue & getOperand(unsigned i) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
uint64_t getZExtValue() const
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand *> NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
const SDValue & getBasePtr() const
This class is used to represent ISD::LOAD nodes.