LLVM  8.0.1
PPCISelLowering.cpp
Go to the documentation of this file.
1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the PPCISelLowering class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "PPCISelLowering.h"
16 #include "PPC.h"
17 #include "PPCCCState.h"
18 #include "PPCCallingConv.h"
19 #include "PPCFrameLowering.h"
20 #include "PPCInstrInfo.h"
21 #include "PPCMachineFunctionInfo.h"
22 #include "PPCPerfectShuffle.h"
23 #include "PPCRegisterInfo.h"
24 #include "PPCSubtarget.h"
25 #include "PPCTargetMachine.h"
26 #include "llvm/ADT/APFloat.h"
27 #include "llvm/ADT/APInt.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/DenseMap.h"
30 #include "llvm/ADT/None.h"
31 #include "llvm/ADT/STLExtras.h"
32 #include "llvm/ADT/SmallPtrSet.h"
33 #include "llvm/ADT/SmallSet.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/ADT/Statistic.h"
36 #include "llvm/ADT/StringRef.h"
37 #include "llvm/ADT/StringSwitch.h"
57 #include "llvm/IR/CallSite.h"
58 #include "llvm/IR/CallingConv.h"
59 #include "llvm/IR/Constant.h"
60 #include "llvm/IR/Constants.h"
61 #include "llvm/IR/DataLayout.h"
62 #include "llvm/IR/DebugLoc.h"
63 #include "llvm/IR/DerivedTypes.h"
64 #include "llvm/IR/Function.h"
65 #include "llvm/IR/GlobalValue.h"
66 #include "llvm/IR/IRBuilder.h"
67 #include "llvm/IR/Instructions.h"
68 #include "llvm/IR/Intrinsics.h"
69 #include "llvm/IR/Module.h"
70 #include "llvm/IR/Type.h"
71 #include "llvm/IR/Use.h"
72 #include "llvm/IR/Value.h"
73 #include "llvm/MC/MCExpr.h"
74 #include "llvm/MC/MCRegisterInfo.h"
77 #include "llvm/Support/Casting.h"
78 #include "llvm/Support/CodeGen.h"
80 #include "llvm/Support/Compiler.h"
81 #include "llvm/Support/Debug.h"
83 #include "llvm/Support/Format.h"
84 #include "llvm/Support/KnownBits.h"
90 #include <algorithm>
91 #include <cassert>
92 #include <cstdint>
93 #include <iterator>
94 #include <list>
95 #include <utility>
96 #include <vector>
97 
98 using namespace llvm;
99 
100 #define DEBUG_TYPE "ppc-lowering"
101 
102 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
103 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
104 
105 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
106 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
107 
108 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
109 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
110 
111 static cl::opt<bool> DisableSCO("disable-ppc-sco",
112 cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
113 
114 static cl::opt<bool> EnableQuadPrecision("enable-ppc-quad-precision",
115 cl::desc("enable quad precision float support on ppc"), cl::Hidden);
116 
117 STATISTIC(NumTailCalls, "Number of tail calls");
118 STATISTIC(NumSiblingCalls, "Number of sibling calls");
119 
120 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
121 
122 // FIXME: Remove this once the bug has been fixed!
124 
126  const PPCSubtarget &STI)
127  : TargetLowering(TM), Subtarget(STI) {
128  // Use _setjmp/_longjmp instead of setjmp/longjmp.
131 
132  // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
133  // arguments are at least 4/8 bytes aligned.
134  bool isPPC64 = Subtarget.isPPC64();
135  setMinStackArgumentAlignment(isPPC64 ? 8:4);
136 
137  // Set up the register classes.
138  addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
139  if (!useSoftFloat()) {
140  if (hasSPE()) {
141  addRegisterClass(MVT::f32, &PPC::SPE4RCRegClass);
142  addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
143  } else {
144  addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
145  addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
146  }
147  }
148 
149  // Match BITREVERSE to customized fast code sequence in the td file.
152 
153  // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
155 
156  // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
157  for (MVT VT : MVT::integer_valuetypes()) {
160  }
161 
163 
164  // PowerPC has pre-inc load and store's.
175  if (!Subtarget.hasSPE()) {
180  }
181 
182  // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
183  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
184  for (MVT VT : ScalarIntVTs) {
189  }
190 
191  if (Subtarget.useCRBits()) {
193 
194  if (isPPC64 || Subtarget.hasFPCVT()) {
197  isPPC64 ? MVT::i64 : MVT::i32);
200  isPPC64 ? MVT::i64 : MVT::i32);
201  } else {
204  }
205 
206  // PowerPC does not support direct load/store of condition registers.
209 
210  // FIXME: Remove this once the ANDI glue bug is fixed:
211  if (ANDIGlueBug)
213 
214  for (MVT VT : MVT::integer_valuetypes()) {
218  }
219 
220  addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
221  }
222 
223  // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
224  // PPC (the libcall is not available).
227 
228  // We do not currently implement these libm ops for PowerPC.
235 
236  // PowerPC has no SREM/UREM instructions unless we are on P9
237  // On P9 we may use a hardware instruction to compute the remainder.
238  // The instructions are not legalized directly because in the cases where the
239  // result of both the remainder and the division is required it is more
240  // efficient to compute the remainder from the result of the division rather
241  // than use the remainder instruction.
242  if (Subtarget.isISA3_0()) {
245  setOperationAction(ISD::SREM, MVT::i64, Custom);
246  setOperationAction(ISD::UREM, MVT::i64, Custom);
247  } else {
250  setOperationAction(ISD::SREM, MVT::i64, Expand);
251  setOperationAction(ISD::UREM, MVT::i64, Expand);
252  }
253 
254  // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
263 
264  // We don't support sin/cos/sqrt/fmod/pow
275  if (Subtarget.hasSPE()) {
278  } else {
281  }
282 
284 
285  // If we're enabling GP optimizations, use hardware square root
286  if (!Subtarget.hasFSQRT() &&
287  !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
288  Subtarget.hasFRE()))
290 
291  if (!Subtarget.hasFSQRT() &&
292  !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
293  Subtarget.hasFRES()))
295 
296  if (Subtarget.hasFCPSGN()) {
299  } else {
302  }
303 
304  if (Subtarget.hasFPRND()) {
309 
314  }
315 
316  // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
317  // to speed up scalar BSWAP64.
318  // CTPOP or CTTZ were introduced in P8/P9 respectively
320  if (Subtarget.hasP9Vector())
321  setOperationAction(ISD::BSWAP, MVT::i64 , Custom);
322  else
323  setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
324  if (Subtarget.isISA3_0()) {
326  setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
327  } else {
329  setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
330  }
331 
332  if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
334  setOperationAction(ISD::CTPOP, MVT::i64 , Legal);
335  } else {
337  setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
338  }
339 
340  // PowerPC does not have ROTR
342  setOperationAction(ISD::ROTR, MVT::i64 , Expand);
343 
344  if (!Subtarget.useCRBits()) {
345  // PowerPC does not have Select
350  }
351 
352  // PowerPC wants to turn select_cc of FP into fsel when possible.
355 
356  // PowerPC wants to optimize integer setcc a bit
357  if (!Subtarget.useCRBits())
359 
360  // PowerPC does not have BRCOND which requires SetCC
361  if (!Subtarget.useCRBits())
363 
365 
366  if (Subtarget.hasSPE()) {
367  // SPE has built-in conversions
371  } else {
372  // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
374 
375  // PowerPC does not have [U|S]INT_TO_FP
378  }
379 
380  if (Subtarget.hasDirectMove() && isPPC64) {
385  } else {
390  }
391 
392  // We cannot sextinreg(i1). Expand to shifts.
394 
395  // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
396  // SjLj exception handling but a light-weight setjmp/longjmp replacement to
397  // support continuation, user-level threading, and etc.. As a result, no
398  // other SjLj exception interfaces are implemented and please don't build
399  // your own exception handling based on them.
400  // LLVM/Clang supports zero-cost DWARF exception handling.
403 
404  // We want to legalize GlobalAddress and ConstantPool nodes into the
405  // appropriate instructions to materialize the address.
416 
417  // TRAP is legal.
419 
420  // TRAMPOLINE is custom lowered.
423 
424  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
426 
427  if (Subtarget.isSVR4ABI()) {
428  if (isPPC64) {
429  // VAARG always uses double-word chunks, so promote anything smaller.
431  AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
433  AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
439  } else {
440  // VAARG is custom lowered with the 32-bit SVR4 ABI.
443  }
444  } else
446 
447  if (Subtarget.isSVR4ABI() && !isPPC64)
448  // VACOPY is custom lowered with the 32-bit SVR4 ABI.
450  else
452 
453  // Use the default implementation.
463 
464  // We want to custom lower some of our intrinsics.
466 
467  // To handle counter-based loop conditions.
469 
474 
475  // Comparisons that require checking two conditions.
476  if (Subtarget.hasSPE()) {
481  }
494 
495  if (Subtarget.has64BitSupport()) {
496  // They also have instructions for converting between i64 and fp.
501  // This is just the low 32 bits of a (signed) fp->i64 conversion.
502  // We cannot do this with Promote because i64 is not a legal type.
504 
505  if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
507  } else {
508  // PowerPC does not have FP_TO_UINT on 32-bit implementations.
509  if (Subtarget.hasSPE())
511  else
513  }
514 
515  // With the instructions enabled under FPCVT, we can do everything.
516  if (Subtarget.hasFPCVT()) {
517  if (Subtarget.has64BitSupport()) {
522  }
523 
528  }
529 
530  if (Subtarget.use64BitRegs()) {
531  // 64-bit PowerPC implementations can support i64 types directly
532  addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
533  // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
535  // 64-bit PowerPC wants to expand i128 shifts itself.
539  } else {
540  // 32-bit PowerPC wants to expand i64 shifts itself.
544  }
545 
546  if (Subtarget.hasAltivec()) {
547  // First set operation action for all vector types to expand. Then we
548  // will selectively turn on ones that can be effectively codegen'd.
549  for (MVT VT : MVT::vector_valuetypes()) {
550  // add/sub are legal for all supported vector VT's.
554 
555  // Vector instructions introduced in P8
556  if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
559  }
560  else {
563  }
564 
565  // Vector instructions introduced in P9
566  if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
568  else
570 
571  // We promote all shuffles to v16i8.
574 
575  // We promote all non-typed operations to v4i32.
591 
592  // No other operations are legal.
630 
631  for (MVT InnerVT : MVT::vector_valuetypes()) {
632  setTruncStoreAction(VT, InnerVT, Expand);
633  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
634  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
635  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
636  }
637  }
638 
639  // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
640  // with merges, splats, etc.
642 
648  Subtarget.useCRBits() ? Legal : Expand);
658 
659  // Without hasP8Altivec set, v2i64 SMAX isn't available.
660  // But ABS custom lowering requires SMAX support.
661  if (!Subtarget.hasP8Altivec())
663 
664  addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
665  addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
666  addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
667  addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
668 
671 
672  if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
675  }
676 
677  if (Subtarget.hasP8Altivec())
679  else
681 
684 
687 
692 
693  // Altivec does not contain unordered floating-point compare instructions
698 
699  if (Subtarget.hasVSX()) {
702  if (Subtarget.hasP8Vector()) {
705  }
706  if (Subtarget.hasDirectMove() && isPPC64) {
715  }
717 
723 
725 
728 
731 
732  // Share the Altivec comparison restrictions.
737 
740 
742 
743  if (Subtarget.hasP8Vector())
744  addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
745 
746  addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
747 
748  addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
749  addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
750  addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
751 
752  if (Subtarget.hasP8Altivec()) {
756 
757  // 128 bit shifts can be accomplished via 3 instructions for SHL and
758  // SRL, but not for SRA because of the instructions available:
759  // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
760  // doing
764 
766  }
767  else {
771 
773 
774  // VSX v2i64 only supports non-arithmetic operations.
777  }
778 
783 
785 
790 
791  // Custom handling for partial vectors of integers converted to
792  // floating point. We already have optimal handling for v2i32 through
793  // the DAG combine, so those aren't necessary.
802 
807 
808  if (Subtarget.hasDirectMove())
811 
812  addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
813  }
814 
815  if (Subtarget.hasP8Altivec()) {
816  addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
817  addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
818  }
819 
820  if (Subtarget.hasP9Vector()) {
823 
824  // 128 bit shifts can be accomplished via 3 instructions for SHL and
825  // SRL, but not for SRA because of the instructions available:
826  // VS{RL} and VS{RL}O.
830 
831  if (EnableQuadPrecision) {
832  addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
838  // No extending loads to f128 on PPC.
839  for (MVT FPT : MVT::fp_valuetypes())
848 
855 
862  // No implementation for these ops for PowerPC.
868  }
869 
870  }
871 
872  if (Subtarget.hasP9Altivec()) {
875  }
876  }
877 
878  if (Subtarget.hasQPX()) {
883 
886 
889 
892 
893  if (!Subtarget.useCRBits())
896 
904 
907 
911 
922 
925 
928 
929  addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
930 
935 
938 
941 
942  if (!Subtarget.useCRBits())
945 
953 
956 
967 
970 
973 
974  addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
975 
979 
980  if (!Subtarget.useCRBits())
983 
986 
994 
997 
998  addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
999 
1004 
1009 
1012 
1013  // These need to set FE_INEXACT, and so cannot be vectorized here.
1016 
1017  if (TM.Options.UnsafeFPMath) {
1020 
1023  } else {
1026 
1029  }
1030  }
1031 
1032  if (Subtarget.has64BitSupport())
1034 
1035  setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1036 
1037  if (!isPPC64) {
1040  }
1041 
1043 
1044  if (Subtarget.hasAltivec()) {
1045  // Altivec instructions set fields to all zeros or all ones.
1047  }
1048 
1049  if (!isPPC64) {
1050  // These libcalls are not available in 32-bit.
1051  setLibcallName(RTLIB::SHL_I128, nullptr);
1052  setLibcallName(RTLIB::SRL_I128, nullptr);
1053  setLibcallName(RTLIB::SRA_I128, nullptr);
1054  }
1055 
1056  setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1057 
1058  // We have target-specific dag combine patterns for the following nodes:
1065  if (Subtarget.hasFPCVT())
1070  if (Subtarget.useCRBits())
1076 
1080 
1082 
1083  if (Subtarget.useCRBits()) {
1087  }
1088 
1089  // Use reciprocal estimates.
1090  if (TM.Options.UnsafeFPMath) {
1093  }
1094 
1095  if (Subtarget.hasP9Altivec()) {
1098  }
1099 
1100  // Darwin long double math library functions have $LDBL128 appended.
1101  if (Subtarget.isDarwin()) {
1102  setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
1103  setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
1104  setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
1105  setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
1106  setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
1107  setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
1108  setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
1109  setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
1110  setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
1111  setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
1112  }
1113 
1114  if (EnableQuadPrecision) {
1115  setLibcallName(RTLIB::LOG_F128, "logf128");
1116  setLibcallName(RTLIB::LOG2_F128, "log2f128");
1117  setLibcallName(RTLIB::LOG10_F128, "log10f128");
1118  setLibcallName(RTLIB::EXP_F128, "expf128");
1119  setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1120  setLibcallName(RTLIB::SIN_F128, "sinf128");
1121  setLibcallName(RTLIB::COS_F128, "cosf128");
1122  setLibcallName(RTLIB::POW_F128, "powf128");
1123  setLibcallName(RTLIB::FMIN_F128, "fminf128");
1124  setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1125  setLibcallName(RTLIB::POWI_F128, "__powikf2");
1126  setLibcallName(RTLIB::REM_F128, "fmodf128");
1127  }
1128 
1129  // With 32 condition bits, we don't need to sink (and duplicate) compares
1130  // aggressively in CodeGenPrep.
1131  if (Subtarget.useCRBits()) {
1134  }
1135 
1137  if (Subtarget.isDarwin())
1139 
1140  switch (Subtarget.getDarwinDirective()) {
1141  default: break;
1142  case PPC::DIR_970:
1143  case PPC::DIR_A2:
1144  case PPC::DIR_E500:
1145  case PPC::DIR_E500mc:
1146  case PPC::DIR_E5500:
1147  case PPC::DIR_PWR4:
1148  case PPC::DIR_PWR5:
1149  case PPC::DIR_PWR5X:
1150  case PPC::DIR_PWR6:
1151  case PPC::DIR_PWR6X:
1152  case PPC::DIR_PWR7:
1153  case PPC::DIR_PWR8:
1154  case PPC::DIR_PWR9:
1157  break;
1158  }
1159 
1160  if (Subtarget.enableMachineScheduler())
1162  else
1164 
1166 
1167  // The Freescale cores do better with aggressive inlining of memcpy and
1168  // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1169  if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
1170  Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
1171  MaxStoresPerMemset = 32;
1173  MaxStoresPerMemcpy = 32;
1175  MaxStoresPerMemmove = 32;
1177  } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
1178  // The A2 also benefits from (very) aggressive inlining of memcpy and
1179  // friends. The overhead of a the function call, even when warm, can be
1180  // over one hundred cycles.
1181  MaxStoresPerMemset = 128;
1182  MaxStoresPerMemcpy = 128;
1183  MaxStoresPerMemmove = 128;
1184  MaxLoadsPerMemcmp = 128;
1185  } else {
1186  MaxLoadsPerMemcmp = 8;
1188  }
1189 }
1190 
1191 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1192 /// the desired ByVal argument alignment.
1193 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
1194  unsigned MaxMaxAlign) {
1195  if (MaxAlign == MaxMaxAlign)
1196  return;
1197  if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1198  if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
1199  MaxAlign = 32;
1200  else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
1201  MaxAlign = 16;
1202  } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1203  unsigned EltAlign = 0;
1204  getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1205  if (EltAlign > MaxAlign)
1206  MaxAlign = EltAlign;
1207  } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1208  for (auto *EltTy : STy->elements()) {
1209  unsigned EltAlign = 0;
1210  getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1211  if (EltAlign > MaxAlign)
1212  MaxAlign = EltAlign;
1213  if (MaxAlign == MaxMaxAlign)
1214  break;
1215  }
1216  }
1217 }
1218 
1219 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1220 /// function arguments in the caller parameter area.
1222  const DataLayout &DL) const {
1223  // Darwin passes everything on 4 byte boundary.
1224  if (Subtarget.isDarwin())
1225  return 4;
1226 
1227  // 16byte and wider vectors are passed on 16byte boundary.
1228  // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1229  unsigned Align = Subtarget.isPPC64() ? 8 : 4;
1230  if (Subtarget.hasAltivec() || Subtarget.hasQPX())
1231  getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
1232  return Align;
1233 }
1234 
1236  CallingConv:: ID CC,
1237  EVT VT) const {
1238  if (Subtarget.hasSPE() && VT == MVT::f64)
1239  return 2;
1240  return PPCTargetLowering::getNumRegisters(Context, VT);
1241 }
1242 
1244  CallingConv:: ID CC,
1245  EVT VT) const {
1246  if (Subtarget.hasSPE() && VT == MVT::f64)
1247  return MVT::i32;
1248  return PPCTargetLowering::getRegisterType(Context, VT);
1249 }
1250 
1252  return Subtarget.useSoftFloat();
1253 }
1254 
1256  return Subtarget.hasSPE();
1257 }
1258 
1259 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1260  switch ((PPCISD::NodeType)Opcode) {
1261  case PPCISD::FIRST_NUMBER: break;
1262  case PPCISD::FSEL: return "PPCISD::FSEL";
1263  case PPCISD::FCFID: return "PPCISD::FCFID";
1264  case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1265  case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1266  case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1267  case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1268  case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1269  case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1270  case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1272  return "PPCISD::FP_TO_UINT_IN_VSR,";
1274  return "PPCISD::FP_TO_SINT_IN_VSR";
1275  case PPCISD::FRE: return "PPCISD::FRE";
1276  case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1277  case PPCISD::STFIWX: return "PPCISD::STFIWX";
1278  case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
1279  case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
1280  case PPCISD::VPERM: return "PPCISD::VPERM";
1281  case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1282  case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1283  case PPCISD::XXREVERSE: return "PPCISD::XXREVERSE";
1284  case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1285  case PPCISD::VECSHL: return "PPCISD::VECSHL";
1286  case PPCISD::CMPB: return "PPCISD::CMPB";
1287  case PPCISD::Hi: return "PPCISD::Hi";
1288  case PPCISD::Lo: return "PPCISD::Lo";
1289  case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1290  case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1291  case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1292  case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1293  case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1294  case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1295  case PPCISD::SRL: return "PPCISD::SRL";
1296  case PPCISD::SRA: return "PPCISD::SRA";
1297  case PPCISD::SHL: return "PPCISD::SHL";
1298  case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1299  case PPCISD::CALL: return "PPCISD::CALL";
1300  case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1301  case PPCISD::MTCTR: return "PPCISD::MTCTR";
1302  case PPCISD::BCTRL: return "PPCISD::BCTRL";
1303  case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1304  case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
1305  case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1306  case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1307  case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1308  case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1309  case PPCISD::MFVSR: return "PPCISD::MFVSR";
1310  case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1311  case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1312  case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1313  case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1314  case PPCISD::ANDIo_1_EQ_BIT: return "PPCISD::ANDIo_1_EQ_BIT";
1315  case PPCISD::ANDIo_1_GT_BIT: return "PPCISD::ANDIo_1_GT_BIT";
1316  case PPCISD::VCMP: return "PPCISD::VCMP";
1317  case PPCISD::VCMPo: return "PPCISD::VCMPo";
1318  case PPCISD::LBRX: return "PPCISD::LBRX";
1319  case PPCISD::STBRX: return "PPCISD::STBRX";
1320  case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1321  case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1322  case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1323  case PPCISD::STXSIX: return "PPCISD::STXSIX";
1324  case PPCISD::VEXTS: return "PPCISD::VEXTS";
1325  case PPCISD::SExtVElems: return "PPCISD::SExtVElems";
1326  case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1327  case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1329  return "PPCISD::ST_VSR_SCAL_INT";
1330  case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1331  case PPCISD::BDNZ: return "PPCISD::BDNZ";
1332  case PPCISD::BDZ: return "PPCISD::BDZ";
1333  case PPCISD::MFFS: return "PPCISD::MFFS";
1334  case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1335  case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1336  case PPCISD::CR6SET: return "PPCISD::CR6SET";
1337  case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1338  case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1339  case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1340  case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1341  case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1342  case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1343  case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1344  case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1345  case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1346  case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1347  case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1348  case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1349  case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1350  case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1351  case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1352  case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1353  case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1354  case PPCISD::SC: return "PPCISD::SC";
1355  case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1356  case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1357  case PPCISD::RFEBB: return "PPCISD::RFEBB";
1358  case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1359  case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1360  case PPCISD::VABSD: return "PPCISD::VABSD";
1361  case PPCISD::QVFPERM: return "PPCISD::QVFPERM";
1362  case PPCISD::QVGPCI: return "PPCISD::QVGPCI";
1363  case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI";
1364  case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI";
1365  case PPCISD::QBFLT: return "PPCISD::QBFLT";
1366  case PPCISD::QVLFSb: return "PPCISD::QVLFSb";
1367  case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1368  case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1369  }
1370  return nullptr;
1371 }
1372 
1374  EVT VT) const {
1375  if (!VT.isVector())
1376  return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1377 
1378  if (Subtarget.hasQPX())
1380 
1382 }
1383 
1385  assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1386  return true;
1387 }
1388 
1389 //===----------------------------------------------------------------------===//
1390 // Node matching predicates, for use by the tblgen matching code.
1391 //===----------------------------------------------------------------------===//
1392 
1393 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1395  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1396  return CFP->getValueAPF().isZero();
1397  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1398  // Maybe this has already been legalized into the constant pool?
1399  if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1400  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1401  return CFP->getValueAPF().isZero();
1402  }
1403  return false;
1404 }
1405 
1406 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1407 /// true if Op is undef or if it matches the specified value.
1408 static bool isConstantOrUndef(int Op, int Val) {
1409  return Op < 0 || Op == Val;
1410 }
1411 
1412 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1413 /// VPKUHUM instruction.
1414 /// The ShuffleKind distinguishes between big-endian operations with
1415 /// two different inputs (0), either-endian operations with two identical
1416 /// inputs (1), and little-endian operations with two different inputs (2).
1417 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1419  SelectionDAG &DAG) {
1420  bool IsLE = DAG.getDataLayout().isLittleEndian();
1421  if (ShuffleKind == 0) {
1422  if (IsLE)
1423  return false;
1424  for (unsigned i = 0; i != 16; ++i)
1425  if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1426  return false;
1427  } else if (ShuffleKind == 2) {
1428  if (!IsLE)
1429  return false;
1430  for (unsigned i = 0; i != 16; ++i)
1431  if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1432  return false;
1433  } else if (ShuffleKind == 1) {
1434  unsigned j = IsLE ? 0 : 1;
1435  for (unsigned i = 0; i != 8; ++i)
1436  if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1437  !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1438  return false;
1439  }
1440  return true;
1441 }
1442 
1443 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1444 /// VPKUWUM instruction.
1445 /// The ShuffleKind distinguishes between big-endian operations with
1446 /// two different inputs (0), either-endian operations with two identical
1447 /// inputs (1), and little-endian operations with two different inputs (2).
1448 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1450  SelectionDAG &DAG) {
1451  bool IsLE = DAG.getDataLayout().isLittleEndian();
1452  if (ShuffleKind == 0) {
1453  if (IsLE)
1454  return false;
1455  for (unsigned i = 0; i != 16; i += 2)
1456  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1457  !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1458  return false;
1459  } else if (ShuffleKind == 2) {
1460  if (!IsLE)
1461  return false;
1462  for (unsigned i = 0; i != 16; i += 2)
1463  if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1464  !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1465  return false;
1466  } else if (ShuffleKind == 1) {
1467  unsigned j = IsLE ? 0 : 2;
1468  for (unsigned i = 0; i != 8; i += 2)
1469  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1470  !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1471  !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1472  !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1473  return false;
1474  }
1475  return true;
1476 }
1477 
1478 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1479 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1480 /// current subtarget.
1481 ///
1482 /// The ShuffleKind distinguishes between big-endian operations with
1483 /// two different inputs (0), either-endian operations with two identical
1484 /// inputs (1), and little-endian operations with two different inputs (2).
1485 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1487  SelectionDAG &DAG) {
1488  const PPCSubtarget& Subtarget =
1489  static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1490  if (!Subtarget.hasP8Vector())
1491  return false;
1492 
1493  bool IsLE = DAG.getDataLayout().isLittleEndian();
1494  if (ShuffleKind == 0) {
1495  if (IsLE)
1496  return false;
1497  for (unsigned i = 0; i != 16; i += 4)
1498  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1499  !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1500  !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1501  !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1502  return false;
1503  } else if (ShuffleKind == 2) {
1504  if (!IsLE)
1505  return false;
1506  for (unsigned i = 0; i != 16; i += 4)
1507  if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1508  !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1509  !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1510  !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1511  return false;
1512  } else if (ShuffleKind == 1) {
1513  unsigned j = IsLE ? 0 : 4;
1514  for (unsigned i = 0; i != 8; i += 4)
1515  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1516  !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1517  !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1518  !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1519  !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1520  !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1521  !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1522  !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1523  return false;
1524  }
1525  return true;
1526 }
1527 
1528 /// isVMerge - Common function, used to match vmrg* shuffles.
1529 ///
1530 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1531  unsigned LHSStart, unsigned RHSStart) {
1532  if (N->getValueType(0) != MVT::v16i8)
1533  return false;
1534  assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1535  "Unsupported merge size!");
1536 
1537  for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1538  for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1539  if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1540  LHSStart+j+i*UnitSize) ||
1541  !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1542  RHSStart+j+i*UnitSize))
1543  return false;
1544  }
1545  return true;
1546 }
1547 
1548 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1549 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1550 /// The ShuffleKind distinguishes between big-endian merges with two
1551 /// different inputs (0), either-endian merges with two identical inputs (1),
1552 /// and little-endian merges with two different inputs (2). For the latter,
1553 /// the input operands are swapped (see PPCInstrAltivec.td).
1555  unsigned ShuffleKind, SelectionDAG &DAG) {
1556  if (DAG.getDataLayout().isLittleEndian()) {
1557  if (ShuffleKind == 1) // unary
1558  return isVMerge(N, UnitSize, 0, 0);
1559  else if (ShuffleKind == 2) // swapped
1560  return isVMerge(N, UnitSize, 0, 16);
1561  else
1562  return false;
1563  } else {
1564  if (ShuffleKind == 1) // unary
1565  return isVMerge(N, UnitSize, 8, 8);
1566  else if (ShuffleKind == 0) // normal
1567  return isVMerge(N, UnitSize, 8, 24);
1568  else
1569  return false;
1570  }
1571 }
1572 
1573 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1574 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1575 /// The ShuffleKind distinguishes between big-endian merges with two
1576 /// different inputs (0), either-endian merges with two identical inputs (1),
1577 /// and little-endian merges with two different inputs (2). For the latter,
1578 /// the input operands are swapped (see PPCInstrAltivec.td).
1580  unsigned ShuffleKind, SelectionDAG &DAG) {
1581  if (DAG.getDataLayout().isLittleEndian()) {
1582  if (ShuffleKind == 1) // unary
1583  return isVMerge(N, UnitSize, 8, 8);
1584  else if (ShuffleKind == 2) // swapped
1585  return isVMerge(N, UnitSize, 8, 24);
1586  else
1587  return false;
1588  } else {
1589  if (ShuffleKind == 1) // unary
1590  return isVMerge(N, UnitSize, 0, 0);
1591  else if (ShuffleKind == 0) // normal
1592  return isVMerge(N, UnitSize, 0, 16);
1593  else
1594  return false;
1595  }
1596 }
1597 
1598 /**
1599  * Common function used to match vmrgew and vmrgow shuffles
1600  *
1601  * The indexOffset determines whether to look for even or odd words in
1602  * the shuffle mask. This is based on the of the endianness of the target
1603  * machine.
1604  * - Little Endian:
1605  * - Use offset of 0 to check for odd elements
1606  * - Use offset of 4 to check for even elements
1607  * - Big Endian:
1608  * - Use offset of 0 to check for even elements
1609  * - Use offset of 4 to check for odd elements
1610  * A detailed description of the vector element ordering for little endian and
1611  * big endian can be found at
1612  * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1613  * Targeting your applications - what little endian and big endian IBM XL C/C++
1614  * compiler differences mean to you
1615  *
1616  * The mask to the shuffle vector instruction specifies the indices of the
1617  * elements from the two input vectors to place in the result. The elements are
1618  * numbered in array-access order, starting with the first vector. These vectors
1619  * are always of type v16i8, thus each vector will contain 16 elements of size
1620  * 8. More info on the shuffle vector can be found in the
1621  * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1622  * Language Reference.
1623  *
1624  * The RHSStartValue indicates whether the same input vectors are used (unary)
1625  * or two different input vectors are used, based on the following:
1626  * - If the instruction uses the same vector for both inputs, the range of the
1627  * indices will be 0 to 15. In this case, the RHSStart value passed should
1628  * be 0.
1629  * - If the instruction has two different vectors then the range of the
1630  * indices will be 0 to 31. In this case, the RHSStart value passed should
1631  * be 16 (indices 0-15 specify elements in the first vector while indices 16
1632  * to 31 specify elements in the second vector).
1633  *
1634  * \param[in] N The shuffle vector SD Node to analyze
1635  * \param[in] IndexOffset Specifies whether to look for even or odd elements
1636  * \param[in] RHSStartValue Specifies the starting index for the righthand input
1637  * vector to the shuffle_vector instruction
1638  * \return true iff this shuffle vector represents an even or odd word merge
1639  */
1640 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1641  unsigned RHSStartValue) {
1642  if (N->getValueType(0) != MVT::v16i8)
1643  return false;
1644 
1645  for (unsigned i = 0; i < 2; ++i)
1646  for (unsigned j = 0; j < 4; ++j)
1647  if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1648  i*RHSStartValue+j+IndexOffset) ||
1649  !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1650  i*RHSStartValue+j+IndexOffset+8))
1651  return false;
1652  return true;
1653 }
1654 
1655 /**
1656  * Determine if the specified shuffle mask is suitable for the vmrgew or
1657  * vmrgow instructions.
1658  *
1659  * \param[in] N The shuffle vector SD Node to analyze
1660  * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1661  * \param[in] ShuffleKind Identify the type of merge:
1662  * - 0 = big-endian merge with two different inputs;
1663  * - 1 = either-endian merge with two identical inputs;
1664  * - 2 = little-endian merge with two different inputs (inputs are swapped for
1665  * little-endian merges).
1666  * \param[in] DAG The current SelectionDAG
1667  * \return true iff this shuffle mask
1668  */
1670  unsigned ShuffleKind, SelectionDAG &DAG) {
1671  if (DAG.getDataLayout().isLittleEndian()) {
1672  unsigned indexOffset = CheckEven ? 4 : 0;
1673  if (ShuffleKind == 1) // Unary
1674  return isVMerge(N, indexOffset, 0);
1675  else if (ShuffleKind == 2) // swapped
1676  return isVMerge(N, indexOffset, 16);
1677  else
1678  return false;
1679  }
1680  else {
1681  unsigned indexOffset = CheckEven ? 0 : 4;
1682  if (ShuffleKind == 1) // Unary
1683  return isVMerge(N, indexOffset, 0);
1684  else if (ShuffleKind == 0) // Normal
1685  return isVMerge(N, indexOffset, 16);
1686  else
1687  return false;
1688  }
1689  return false;
1690 }
1691 
1692 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1693 /// amount, otherwise return -1.
1694 /// The ShuffleKind distinguishes between big-endian operations with two
1695 /// different inputs (0), either-endian operations with two identical inputs
1696 /// (1), and little-endian operations with two different inputs (2). For the
1697 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
1698 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1699  SelectionDAG &DAG) {
1700  if (N->getValueType(0) != MVT::v16i8)
1701  return -1;
1702 
1703  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1704 
1705  // Find the first non-undef value in the shuffle mask.
1706  unsigned i;
1707  for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1708  /*search*/;
1709 
1710  if (i == 16) return -1; // all undef.
1711 
1712  // Otherwise, check to see if the rest of the elements are consecutively
1713  // numbered from this value.
1714  unsigned ShiftAmt = SVOp->getMaskElt(i);
1715  if (ShiftAmt < i) return -1;
1716 
1717  ShiftAmt -= i;
1718  bool isLE = DAG.getDataLayout().isLittleEndian();
1719 
1720  if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1721  // Check the rest of the elements to see if they are consecutive.
1722  for (++i; i != 16; ++i)
1723  if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1724  return -1;
1725  } else if (ShuffleKind == 1) {
1726  // Check the rest of the elements to see if they are consecutive.
1727  for (++i; i != 16; ++i)
1728  if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1729  return -1;
1730  } else
1731  return -1;
1732 
1733  if (isLE)
1734  ShiftAmt = 16 - ShiftAmt;
1735 
1736  return ShiftAmt;
1737 }
1738 
1739 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1740 /// specifies a splat of a single element that is suitable for input to
1741 /// VSPLTB/VSPLTH/VSPLTW.
1743  assert(N->getValueType(0) == MVT::v16i8 &&
1744  (EltSize == 1 || EltSize == 2 || EltSize == 4));
1745 
1746  // The consecutive indices need to specify an element, not part of two
1747  // different elements. So abandon ship early if this isn't the case.
1748  if (N->getMaskElt(0) % EltSize != 0)
1749  return false;
1750 
1751  // This is a splat operation if each element of the permute is the same, and
1752  // if the value doesn't reference the second vector.
1753  unsigned ElementBase = N->getMaskElt(0);
1754 
1755  // FIXME: Handle UNDEF elements too!
1756  if (ElementBase >= 16)
1757  return false;
1758 
1759  // Check that the indices are consecutive, in the case of a multi-byte element
1760  // splatted with a v16i8 mask.
1761  for (unsigned i = 1; i != EltSize; ++i)
1762  if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1763  return false;
1764 
1765  for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1766  if (N->getMaskElt(i) < 0) continue;
1767  for (unsigned j = 0; j != EltSize; ++j)
1768  if (N->getMaskElt(i+j) != N->getMaskElt(j))
1769  return false;
1770  }
1771  return true;
1772 }
1773 
1774 /// Check that the mask is shuffling N byte elements. Within each N byte
1775 /// element of the mask, the indices could be either in increasing or
1776 /// decreasing order as long as they are consecutive.
1777 /// \param[in] N the shuffle vector SD Node to analyze
1778 /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
1779 /// Word/DoubleWord/QuadWord).
1780 /// \param[in] StepLen the delta indices number among the N byte element, if
1781 /// the mask is in increasing/decreasing order then it is 1/-1.
1782 /// \return true iff the mask is shuffling N byte elements.
1783 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
1784  int StepLen) {
1785  assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
1786  "Unexpected element width.");
1787  assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
1788 
1789  unsigned NumOfElem = 16 / Width;
1790  unsigned MaskVal[16]; // Width is never greater than 16
1791  for (unsigned i = 0; i < NumOfElem; ++i) {
1792  MaskVal[0] = N->getMaskElt(i * Width);
1793  if ((StepLen == 1) && (MaskVal[0] % Width)) {
1794  return false;
1795  } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
1796  return false;
1797  }
1798 
1799  for (unsigned int j = 1; j < Width; ++j) {
1800  MaskVal[j] = N->getMaskElt(i * Width + j);
1801  if (MaskVal[j] != MaskVal[j-1] + StepLen) {
1802  return false;
1803  }
1804  }
1805  }
1806 
1807  return true;
1808 }
1809 
1810 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1811  unsigned &InsertAtByte, bool &Swap, bool IsLE) {
1812  if (!isNByteElemShuffleMask(N, 4, 1))
1813  return false;
1814 
1815  // Now we look at mask elements 0,4,8,12
1816  unsigned M0 = N->getMaskElt(0) / 4;
1817  unsigned M1 = N->getMaskElt(4) / 4;
1818  unsigned M2 = N->getMaskElt(8) / 4;
1819  unsigned M3 = N->getMaskElt(12) / 4;
1820  unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
1821  unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
1822 
1823  // Below, let H and L be arbitrary elements of the shuffle mask
1824  // where H is in the range [4,7] and L is in the range [0,3].
1825  // H, 1, 2, 3 or L, 5, 6, 7
1826  if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
1827  (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
1828  ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
1829  InsertAtByte = IsLE ? 12 : 0;
1830  Swap = M0 < 4;
1831  return true;
1832  }
1833  // 0, H, 2, 3 or 4, L, 6, 7
1834  if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
1835  (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
1836  ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
1837  InsertAtByte = IsLE ? 8 : 4;
1838  Swap = M1 < 4;
1839  return true;
1840  }
1841  // 0, 1, H, 3 or 4, 5, L, 7
1842  if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
1843  (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
1844  ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
1845  InsertAtByte = IsLE ? 4 : 8;
1846  Swap = M2 < 4;
1847  return true;
1848  }
1849  // 0, 1, 2, H or 4, 5, 6, L
1850  if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
1851  (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
1852  ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
1853  InsertAtByte = IsLE ? 0 : 12;
1854  Swap = M3 < 4;
1855  return true;
1856  }
1857 
1858  // If both vector operands for the shuffle are the same vector, the mask will
1859  // contain only elements from the first one and the second one will be undef.
1860  if (N->getOperand(1).isUndef()) {
1861  ShiftElts = 0;
1862  Swap = true;
1863  unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
1864  if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
1865  InsertAtByte = IsLE ? 12 : 0;
1866  return true;
1867  }
1868  if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
1869  InsertAtByte = IsLE ? 8 : 4;
1870  return true;
1871  }
1872  if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
1873  InsertAtByte = IsLE ? 4 : 8;
1874  return true;
1875  }
1876  if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
1877  InsertAtByte = IsLE ? 0 : 12;
1878  return true;
1879  }
1880  }
1881 
1882  return false;
1883 }
1884 
1886  bool &Swap, bool IsLE) {
1887  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
1888  // Ensure each byte index of the word is consecutive.
1889  if (!isNByteElemShuffleMask(N, 4, 1))
1890  return false;
1891 
1892  // Now we look at mask elements 0,4,8,12, which are the beginning of words.
1893  unsigned M0 = N->getMaskElt(0) / 4;
1894  unsigned M1 = N->getMaskElt(4) / 4;
1895  unsigned M2 = N->getMaskElt(8) / 4;
1896  unsigned M3 = N->getMaskElt(12) / 4;
1897 
1898  // If both vector operands for the shuffle are the same vector, the mask will
1899  // contain only elements from the first one and the second one will be undef.
1900  if (N->getOperand(1).isUndef()) {
1901  assert(M0 < 4 && "Indexing into an undef vector?");
1902  if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
1903  return false;
1904 
1905  ShiftElts = IsLE ? (4 - M0) % 4 : M0;
1906  Swap = false;
1907  return true;
1908  }
1909 
1910  // Ensure each word index of the ShuffleVector Mask is consecutive.
1911  if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
1912  return false;
1913 
1914  if (IsLE) {
1915  if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
1916  // Input vectors don't need to be swapped if the leading element
1917  // of the result is one of the 3 left elements of the second vector
1918  // (or if there is no shift to be done at all).
1919  Swap = false;
1920  ShiftElts = (8 - M0) % 8;
1921  } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
1922  // Input vectors need to be swapped if the leading element
1923  // of the result is one of the 3 left elements of the first vector
1924  // (or if we're shifting by 4 - thereby simply swapping the vectors).
1925  Swap = true;
1926  ShiftElts = (4 - M0) % 4;
1927  }
1928 
1929  return true;
1930  } else { // BE
1931  if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
1932  // Input vectors don't need to be swapped if the leading element
1933  // of the result is one of the 4 elements of the first vector.
1934  Swap = false;
1935  ShiftElts = M0;
1936  } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
1937  // Input vectors need to be swapped if the leading element
1938  // of the result is one of the 4 elements of the right vector.
1939  Swap = true;
1940  ShiftElts = M0 - 4;
1941  }
1942 
1943  return true;
1944  }
1945 }
1946 
1948  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
1949 
1950  if (!isNByteElemShuffleMask(N, Width, -1))
1951  return false;
1952 
1953  for (int i = 0; i < 16; i += Width)
1954  if (N->getMaskElt(i) != i + Width - 1)
1955  return false;
1956 
1957  return true;
1958 }
1959 
1961  return isXXBRShuffleMaskHelper(N, 2);
1962 }
1963 
1965  return isXXBRShuffleMaskHelper(N, 4);
1966 }
1967 
1969  return isXXBRShuffleMaskHelper(N, 8);
1970 }
1971 
1973  return isXXBRShuffleMaskHelper(N, 16);
1974 }
1975 
1976 /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
1977 /// if the inputs to the instruction should be swapped and set \p DM to the
1978 /// value for the immediate.
1979 /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
1980 /// AND element 0 of the result comes from the first input (LE) or second input
1981 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
1982 /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
1983 /// mask.
1985  bool &Swap, bool IsLE) {
1986  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
1987 
1988  // Ensure each byte index of the double word is consecutive.
1989  if (!isNByteElemShuffleMask(N, 8, 1))
1990  return false;
1991 
1992  unsigned M0 = N->getMaskElt(0) / 8;
1993  unsigned M1 = N->getMaskElt(8) / 8;
1994  assert(((M0 | M1) < 4) && "A mask element out of bounds?");
1995 
1996  // If both vector operands for the shuffle are the same vector, the mask will
1997  // contain only elements from the first one and the second one will be undef.
1998  if (N->getOperand(1).isUndef()) {
1999  if ((M0 | M1) < 2) {
2000  DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2001  Swap = false;
2002  return true;
2003  } else
2004  return false;
2005  }
2006 
2007  if (IsLE) {
2008  if (M0 > 1 && M1 < 2) {
2009  Swap = false;
2010  } else if (M0 < 2 && M1 > 1) {
2011  M0 = (M0 + 2) % 4;
2012  M1 = (M1 + 2) % 4;
2013  Swap = true;
2014  } else
2015  return false;
2016 
2017  // Note: if control flow comes here that means Swap is already set above
2018  DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2019  return true;
2020  } else { // BE
2021  if (M0 < 2 && M1 > 1) {
2022  Swap = false;
2023  } else if (M0 > 1 && M1 < 2) {
2024  M0 = (M0 + 2) % 4;
2025  M1 = (M1 + 2) % 4;
2026  Swap = true;
2027  } else
2028  return false;
2029 
2030  // Note: if control flow comes here that means Swap is already set above
2031  DM = (M0 << 1) + (M1 & 1);
2032  return true;
2033  }
2034 }
2035 
2036 
2037 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
2038 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
2039 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
2040  SelectionDAG &DAG) {
2041  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2042  assert(isSplatShuffleMask(SVOp, EltSize));
2043  if (DAG.getDataLayout().isLittleEndian())
2044  return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2045  else
2046  return SVOp->getMaskElt(0) / EltSize;
2047 }
2048 
2049 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2050 /// by using a vspltis[bhw] instruction of the specified element size, return
2051 /// the constant being splatted. The ByteSize field indicates the number of
2052 /// bytes of each element [124] -> [bhw].
2053 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2054  SDValue OpVal(nullptr, 0);
2055 
2056  // If ByteSize of the splat is bigger than the element size of the
2057  // build_vector, then we have a case where we are checking for a splat where
2058  // multiple elements of the buildvector are folded together into a single
2059  // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2060  unsigned EltSize = 16/N->getNumOperands();
2061  if (EltSize < ByteSize) {
2062  unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2063  SDValue UniquedVals[4];
2064  assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2065 
2066  // See if all of the elements in the buildvector agree across.
2067  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2068  if (N->getOperand(i).isUndef()) continue;
2069  // If the element isn't a constant, bail fully out.
2070  if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2071 
2072  if (!UniquedVals[i&(Multiple-1)].getNode())
2073  UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2074  else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2075  return SDValue(); // no match.
2076  }
2077 
2078  // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2079  // either constant or undef values that are identical for each chunk. See
2080  // if these chunks can form into a larger vspltis*.
2081 
2082  // Check to see if all of the leading entries are either 0 or -1. If
2083  // neither, then this won't fit into the immediate field.
2084  bool LeadingZero = true;
2085  bool LeadingOnes = true;
2086  for (unsigned i = 0; i != Multiple-1; ++i) {
2087  if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2088 
2089  LeadingZero &= isNullConstant(UniquedVals[i]);
2090  LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2091  }
2092  // Finally, check the least significant entry.
2093  if (LeadingZero) {
2094  if (!UniquedVals[Multiple-1].getNode())
2095  return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2096  int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2097  if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2098  return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2099  }
2100  if (LeadingOnes) {
2101  if (!UniquedVals[Multiple-1].getNode())
2102  return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2103  int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2104  if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2105  return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2106  }
2107 
2108  return SDValue();
2109  }
2110 
2111  // Check to see if this buildvec has a single non-undef value in its elements.
2112  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2113  if (N->getOperand(i).isUndef()) continue;
2114  if (!OpVal.getNode())
2115  OpVal = N->getOperand(i);
2116  else if (OpVal != N->getOperand(i))
2117  return SDValue();
2118  }
2119 
2120  if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2121 
2122  unsigned ValSizeInBytes = EltSize;
2123  uint64_t Value = 0;
2124  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2125  Value = CN->getZExtValue();
2126  } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2127  assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2128  Value = FloatToBits(CN->getValueAPF().convertToFloat());
2129  }
2130 
2131  // If the splat value is larger than the element value, then we can never do
2132  // this splat. The only case that we could fit the replicated bits into our
2133  // immediate field for would be zero, and we prefer to use vxor for it.
2134  if (ValSizeInBytes < ByteSize) return SDValue();
2135 
2136  // If the element value is larger than the splat value, check if it consists
2137  // of a repeated bit pattern of size ByteSize.
2138  if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2139  return SDValue();
2140 
2141  // Properly sign extend the value.
2142  int MaskVal = SignExtend32(Value, ByteSize * 8);
2143 
2144  // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2145  if (MaskVal == 0) return SDValue();
2146 
2147  // Finally, if this value fits in a 5 bit sext field, return it
2148  if (SignExtend32<5>(MaskVal) == MaskVal)
2149  return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2150  return SDValue();
2151 }
2152 
2153 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
2154 /// amount, otherwise return -1.
2156  EVT VT = N->getValueType(0);
2157  if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
2158  return -1;
2159 
2160  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2161 
2162  // Find the first non-undef value in the shuffle mask.
2163  unsigned i;
2164  for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
2165  /*search*/;
2166 
2167  if (i == 4) return -1; // all undef.
2168 
2169  // Otherwise, check to see if the rest of the elements are consecutively
2170  // numbered from this value.
2171  unsigned ShiftAmt = SVOp->getMaskElt(i);
2172  if (ShiftAmt < i) return -1;
2173  ShiftAmt -= i;
2174 
2175  // Check the rest of the elements to see if they are consecutive.
2176  for (++i; i != 4; ++i)
2177  if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2178  return -1;
2179 
2180  return ShiftAmt;
2181 }
2182 
2183 //===----------------------------------------------------------------------===//
2184 // Addressing Mode Selection
2185 //===----------------------------------------------------------------------===//
2186 
2187 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2188 /// or 64-bit immediate, and if the value can be accurately represented as a
2189 /// sign extension from a 16-bit value. If so, this returns true and the
2190 /// immediate.
2191 bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2192  if (!isa<ConstantSDNode>(N))
2193  return false;
2194 
2195  Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2196  if (N->getValueType(0) == MVT::i32)
2197  return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2198  else
2199  return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2200 }
2201 bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2202  return isIntS16Immediate(Op.getNode(), Imm);
2203 }
2204 
2205 /// SelectAddressRegReg - Given the specified addressed, check to see if it
2206 /// can be represented as an indexed [r+r] operation. Returns false if it
2207 /// can be more efficiently represented with [r+imm].
2209  SDValue &Index,
2210  SelectionDAG &DAG) const {
2211  int16_t imm = 0;
2212  if (N.getOpcode() == ISD::ADD) {
2213  if (isIntS16Immediate(N.getOperand(1), imm))
2214  return false; // r+i
2215  if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2216  return false; // r+i
2217 
2218  Base = N.getOperand(0);
2219  Index = N.getOperand(1);
2220  return true;
2221  } else if (N.getOpcode() == ISD::OR) {
2222  if (isIntS16Immediate(N.getOperand(1), imm))
2223  return false; // r+i can fold it if we can.
2224 
2225  // If this is an or of disjoint bitfields, we can codegen this as an add
2226  // (for better address arithmetic) if the LHS and RHS of the OR are provably
2227  // disjoint.
2228  KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2229 
2230  if (LHSKnown.Zero.getBoolValue()) {
2231  KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2232  // If all of the bits are known zero on the LHS or RHS, the add won't
2233  // carry.
2234  if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2235  Base = N.getOperand(0);
2236  Index = N.getOperand(1);
2237  return true;
2238  }
2239  }
2240  }
2241 
2242  return false;
2243 }
2244 
2245 // If we happen to be doing an i64 load or store into a stack slot that has
2246 // less than a 4-byte alignment, then the frame-index elimination may need to
2247 // use an indexed load or store instruction (because the offset may not be a
2248 // multiple of 4). The extra register needed to hold the offset comes from the
2249 // register scavenger, and it is possible that the scavenger will need to use
2250 // an emergency spill slot. As a result, we need to make sure that a spill slot
2251 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2252 // stack slot.
2253 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2254  // FIXME: This does not handle the LWA case.
2255  if (VT != MVT::i64)
2256  return;
2257 
2258  // NOTE: We'll exclude negative FIs here, which come from argument
2259  // lowering, because there are no known test cases triggering this problem
2260  // using packed structures (or similar). We can remove this exclusion if
2261  // we find such a test case. The reason why this is so test-case driven is
2262  // because this entire 'fixup' is only to prevent crashes (from the
2263  // register scavenger) on not-really-valid inputs. For example, if we have:
2264  // %a = alloca i1
2265  // %b = bitcast i1* %a to i64*
2266  // store i64* a, i64 b
2267  // then the store should really be marked as 'align 1', but is not. If it
2268  // were marked as 'align 1' then the indexed form would have been
2269  // instruction-selected initially, and the problem this 'fixup' is preventing
2270  // won't happen regardless.
2271  if (FrameIdx < 0)
2272  return;
2273 
2274  MachineFunction &MF = DAG.getMachineFunction();
2275  MachineFrameInfo &MFI = MF.getFrameInfo();
2276 
2277  unsigned Align = MFI.getObjectAlignment(FrameIdx);
2278  if (Align >= 4)
2279  return;
2280 
2281  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2282  FuncInfo->setHasNonRISpills();
2283 }
2284 
2285 /// Returns true if the address N can be represented by a base register plus
2286 /// a signed 16-bit displacement [r+imm], and if it is not better
2287 /// represented as reg+reg. If \p Alignment is non-zero, only accept
2288 /// displacements that are multiples of that value.
2290  SDValue &Base,
2291  SelectionDAG &DAG,
2292  unsigned Alignment) const {
2293  // FIXME dl should come from parent load or store, not from address
2294  SDLoc dl(N);
2295  // If this can be more profitably realized as r+r, fail.
2296  if (SelectAddressRegReg(N, Disp, Base, DAG))
2297  return false;
2298 
2299  if (N.getOpcode() == ISD::ADD) {
2300  int16_t imm = 0;
2301  if (isIntS16Immediate(N.getOperand(1), imm) &&
2302  (!Alignment || (imm % Alignment) == 0)) {
2303  Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2304  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2305  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2306  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2307  } else {
2308  Base = N.getOperand(0);
2309  }
2310  return true; // [r+i]
2311  } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2312  // Match LOAD (ADD (X, Lo(G))).
2313  assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
2314  && "Cannot handle constant offsets yet!");
2315  Disp = N.getOperand(1).getOperand(0); // The global address.
2316  assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
2317  Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
2318  Disp.getOpcode() == ISD::TargetConstantPool ||
2319  Disp.getOpcode() == ISD::TargetJumpTable);
2320  Base = N.getOperand(0);
2321  return true; // [&g+r]
2322  }
2323  } else if (N.getOpcode() == ISD::OR) {
2324  int16_t imm = 0;
2325  if (isIntS16Immediate(N.getOperand(1), imm) &&
2326  (!Alignment || (imm % Alignment) == 0)) {
2327  // If this is an or of disjoint bitfields, we can codegen this as an add
2328  // (for better address arithmetic) if the LHS and RHS of the OR are
2329  // provably disjoint.
2330  KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2331 
2332  if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2333  // If all of the bits are known zero on the LHS or RHS, the add won't
2334  // carry.
2335  if (FrameIndexSDNode *FI =
2336  dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2337  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2338  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2339  } else {
2340  Base = N.getOperand(0);
2341  }
2342  Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2343  return true;
2344  }
2345  }
2346  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2347  // Loading from a constant address.
2348 
2349  // If this address fits entirely in a 16-bit sext immediate field, codegen
2350  // this as "d, 0"
2351  int16_t Imm;
2352  if (isIntS16Immediate(CN, Imm) && (!Alignment || (Imm % Alignment) == 0)) {
2353  Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2354  Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2355  CN->getValueType(0));
2356  return true;
2357  }
2358 
2359  // Handle 32-bit sext immediates with LIS + addr mode.
2360  if ((CN->getValueType(0) == MVT::i32 ||
2361  (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2362  (!Alignment || (CN->getZExtValue() % Alignment) == 0)) {
2363  int Addr = (int)CN->getZExtValue();
2364 
2365  // Otherwise, break this down into an LIS + disp.
2366  Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2367 
2368  Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2369  MVT::i32);
2370  unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2371  Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2372  return true;
2373  }
2374  }
2375 
2376  Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2377  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2378  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2379  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2380  } else
2381  Base = N;
2382  return true; // [r+0]
2383 }
2384 
2385 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2386 /// represented as an indexed [r+r] operation.
2388  SDValue &Index,
2389  SelectionDAG &DAG) const {
2390  // Check to see if we can easily represent this as an [r+r] address. This
2391  // will fail if it thinks that the address is more profitably represented as
2392  // reg+imm, e.g. where imm = 0.
2393  if (SelectAddressRegReg(N, Base, Index, DAG))
2394  return true;
2395 
2396  // If the address is the result of an add, we will utilize the fact that the
2397  // address calculation includes an implicit add. However, we can reduce
2398  // register pressure if we do not materialize a constant just for use as the
2399  // index register. We only get rid of the add if it is not an add of a
2400  // value and a 16-bit signed constant and both have a single use.
2401  int16_t imm = 0;
2402  if (N.getOpcode() == ISD::ADD &&
2403  (!isIntS16Immediate(N.getOperand(1), imm) ||
2404  !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2405  Base = N.getOperand(0);
2406  Index = N.getOperand(1);
2407  return true;
2408  }
2409 
2410  // Otherwise, do it the hard way, using R0 as the base register.
2411  Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2412  N.getValueType());
2413  Index = N;
2414  return true;
2415 }
2416 
2417 /// Returns true if we should use a direct load into vector instruction
2418 /// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2420  if (!N->hasOneUse())
2421  return false;
2422 
2423  // If there are any other uses other than scalar to vector, then we should
2424  // keep it as a scalar load -> direct move pattern to prevent multiple
2425  // loads. Currently, only check for i64 since we have lxsd/lfd to do this
2426  // efficiently, but no update equivalent.
2427  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2428  EVT MemVT = LD->getMemoryVT();
2429  if (MemVT.isSimple() && MemVT.getSimpleVT().SimpleTy == MVT::i64) {
2430  SDNode *User = *(LD->use_begin());
2431  if (User->getOpcode() == ISD::SCALAR_TO_VECTOR)
2432  return true;
2433  }
2434  }
2435 
2436  return false;
2437 }
2438 
2439 /// getPreIndexedAddressParts - returns true by value, base pointer and
2440 /// offset pointer and addressing mode by reference if the node's address
2441 /// can be legally represented as pre-indexed load / store address.
2443  SDValue &Offset,
2444  ISD::MemIndexedMode &AM,
2445  SelectionDAG &DAG) const {
2446  if (DisablePPCPreinc) return false;
2447 
2448  bool isLoad = true;
2449  SDValue Ptr;
2450  EVT VT;
2451  unsigned Alignment;
2452  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2453  Ptr = LD->getBasePtr();
2454  VT = LD->getMemoryVT();
2455  Alignment = LD->getAlignment();
2456  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2457  Ptr = ST->getBasePtr();
2458  VT = ST->getMemoryVT();
2459  Alignment = ST->getAlignment();
2460  isLoad = false;
2461  } else
2462  return false;
2463 
2464  // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2465  // instructions because we can fold these into a more efficient instruction
2466  // instead, (such as LXSD).
2467  if (isLoad && usePartialVectorLoads(N)) {
2468  return false;
2469  }
2470 
2471  // PowerPC doesn't have preinc load/store instructions for vectors (except
2472  // for QPX, which does have preinc r+r forms).
2473  if (VT.isVector()) {
2474  if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
2475  return false;
2476  } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
2477  AM = ISD::PRE_INC;
2478  return true;
2479  }
2480  }
2481 
2482  if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2483  // Common code will reject creating a pre-inc form if the base pointer
2484  // is a frame index, or if N is a store and the base pointer is either
2485  // the same as or a predecessor of the value being stored. Check for
2486  // those situations here, and try with swapped Base/Offset instead.
2487  bool Swap = false;
2488 
2489  if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2490  Swap = true;
2491  else if (!isLoad) {
2492  SDValue Val = cast<StoreSDNode>(N)->getValue();
2493  if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2494  Swap = true;
2495  }
2496 
2497  if (Swap)
2498  std::swap(Base, Offset);
2499 
2500  AM = ISD::PRE_INC;
2501  return true;
2502  }
2503 
2504  // LDU/STU can only handle immediates that are a multiple of 4.
2505  if (VT != MVT::i64) {
2506  if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 0))
2507  return false;
2508  } else {
2509  // LDU/STU need an address with at least 4-byte alignment.
2510  if (Alignment < 4)
2511  return false;
2512 
2513  if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 4))
2514  return false;
2515  }
2516 
2517  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2518  // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2519  // sext i32 to i64 when addr mode is r+i.
2520  if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2521  LD->getExtensionType() == ISD::SEXTLOAD &&
2522  isa<ConstantSDNode>(Offset))
2523  return false;
2524  }
2525 
2526  AM = ISD::PRE_INC;
2527  return true;
2528 }
2529 
2530 //===----------------------------------------------------------------------===//
2531 // LowerOperation implementation
2532 //===----------------------------------------------------------------------===//
2533 
2534 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
2535 /// and LoOpFlags to the target MO flags.
2536 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2537  unsigned &HiOpFlags, unsigned &LoOpFlags,
2538  const GlobalValue *GV = nullptr) {
2539  HiOpFlags = PPCII::MO_HA;
2540  LoOpFlags = PPCII::MO_LO;
2541 
2542  // Don't use the pic base if not in PIC relocation model.
2543  if (IsPIC) {
2544  HiOpFlags |= PPCII::MO_PIC_FLAG;
2545  LoOpFlags |= PPCII::MO_PIC_FLAG;
2546  }
2547 
2548  // If this is a reference to a global value that requires a non-lazy-ptr, make
2549  // sure that instruction lowering adds it.
2550  if (GV && Subtarget.hasLazyResolverStub(GV)) {
2551  HiOpFlags |= PPCII::MO_NLP_FLAG;
2552  LoOpFlags |= PPCII::MO_NLP_FLAG;
2553 
2554  if (GV->hasHiddenVisibility()) {
2555  HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2556  LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2557  }
2558  }
2559 }
2560 
2561 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2562  SelectionDAG &DAG) {
2563  SDLoc DL(HiPart);
2564  EVT PtrVT = HiPart.getValueType();
2565  SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2566 
2567  SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2568  SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2569 
2570  // With PIC, the first instruction is actually "GR+hi(&G)".
2571  if (isPIC)
2572  Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2573  DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2574 
2575  // Generate non-pic code that has direct accesses to the constant pool.
2576  // The address of the global is just (hi(&g)+lo(&g)).
2577  return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2578 }
2579 
2581  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2582  FuncInfo->setUsesTOCBasePtr();
2583 }
2584 
2585 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2587 }
2588 
2589 static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit,
2590  SDValue GA) {
2591  EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2592  SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) :
2593  DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2594 
2595  SDValue Ops[] = { GA, Reg };
2596  return DAG.getMemIntrinsicNode(
2597  PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2600 }
2601 
2602 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2603  SelectionDAG &DAG) const {
2604  EVT PtrVT = Op.getValueType();
2605  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2606  const Constant *C = CP->getConstVal();
2607 
2608  // 64-bit SVR4 ABI code is always position-independent.
2609  // The actual address of the GlobalValue is stored in the TOC.
2610  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2611  setUsesTOCBasePtr(DAG);
2612  SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
2613  return getTOCEntry(DAG, SDLoc(CP), true, GA);
2614  }
2615 
2616  unsigned MOHiFlag, MOLoFlag;
2617  bool IsPIC = isPositionIndependent();
2618  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2619 
2620  if (IsPIC && Subtarget.isSVR4ABI()) {
2621  SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
2623  return getTOCEntry(DAG, SDLoc(CP), false, GA);
2624  }
2625 
2626  SDValue CPIHi =
2627  DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
2628  SDValue CPILo =
2629  DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
2630  return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2631 }
2632 
2633 // For 64-bit PowerPC, prefer the more compact relative encodings.
2634 // This trades 32 bits per jump table entry for one or two instructions
2635 // on the jump site.
2637  if (isJumpTableRelative())
2639 
2641 }
2642 
2644  if (Subtarget.isPPC64())
2645  return true;
2647 }
2648 
2650  SelectionDAG &DAG) const {
2651  if (!Subtarget.isPPC64())
2652  return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2653 
2654  switch (getTargetMachine().getCodeModel()) {
2655  case CodeModel::Small:
2656  case CodeModel::Medium:
2657  return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2658  default:
2659  return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
2660  getPointerTy(DAG.getDataLayout()));
2661  }
2662 }
2663 
2664 const MCExpr *
2666  unsigned JTI,
2667  MCContext &Ctx) const {
2668  if (!Subtarget.isPPC64())
2669  return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2670 
2671  switch (getTargetMachine().getCodeModel()) {
2672  case CodeModel::Small:
2673  case CodeModel::Medium:
2674  return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2675  default:
2676  return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2677  }
2678 }
2679 
2680 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
2681  EVT PtrVT = Op.getValueType();
2682  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2683 
2684  // 64-bit SVR4 ABI code is always position-independent.
2685  // The actual address of the GlobalValue is stored in the TOC.
2686  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2687  setUsesTOCBasePtr(DAG);
2688  SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2689  return getTOCEntry(DAG, SDLoc(JT), true, GA);
2690  }
2691 
2692  unsigned MOHiFlag, MOLoFlag;
2693  bool IsPIC = isPositionIndependent();
2694  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2695 
2696  if (IsPIC && Subtarget.isSVR4ABI()) {
2697  SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
2699  return getTOCEntry(DAG, SDLoc(GA), false, GA);
2700  }
2701 
2702  SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
2703  SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
2704  return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
2705 }
2706 
2707 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
2708  SelectionDAG &DAG) const {
2709  EVT PtrVT = Op.getValueType();
2710  BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
2711  const BlockAddress *BA = BASDN->getBlockAddress();
2712 
2713  // 64-bit SVR4 ABI code is always position-independent.
2714  // The actual BlockAddress is stored in the TOC.
2715  if (Subtarget.isSVR4ABI() &&
2716  (Subtarget.isPPC64() || isPositionIndependent())) {
2717  if (Subtarget.isPPC64())
2718  setUsesTOCBasePtr(DAG);
2719  SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
2720  return getTOCEntry(DAG, SDLoc(BASDN), Subtarget.isPPC64(), GA);
2721  }
2722 
2723  unsigned MOHiFlag, MOLoFlag;
2724  bool IsPIC = isPositionIndependent();
2725  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2726  SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
2727  SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
2728  return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
2729 }
2730 
2731 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2732  SelectionDAG &DAG) const {
2733  // FIXME: TLS addresses currently use medium model code sequences,
2734  // which is the most useful form. Eventually support for small and
2735  // large models could be added if users need it, at the cost of
2736  // additional complexity.
2737  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2738  if (DAG.getTarget().useEmulatedTLS())
2739  return LowerToTLSEmulatedModel(GA, DAG);
2740 
2741  SDLoc dl(GA);
2742  const GlobalValue *GV = GA->getGlobal();
2743  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2744  bool is64bit = Subtarget.isPPC64();
2745  const Module *M = DAG.getMachineFunction().getFunction().getParent();
2746  PICLevel::Level picLevel = M->getPICLevel();
2747 
2749 
2750  if (Model == TLSModel::LocalExec) {
2751  SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2753  SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2755  SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
2756  : DAG.getRegister(PPC::R2, MVT::i32);
2757 
2758  SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
2759  return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
2760  }
2761 
2762  if (Model == TLSModel::InitialExec) {
2763  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2764  SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2765  PPCII::MO_TLS);
2766  SDValue GOTPtr;
2767  if (is64bit) {
2768  setUsesTOCBasePtr(DAG);
2769  SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2770  GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
2771  PtrVT, GOTReg, TGA);
2772  } else
2773  GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
2774  SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
2775  PtrVT, TGA, GOTPtr);
2776  return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
2777  }
2778 
2779  if (Model == TLSModel::GeneralDynamic) {
2780  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2781  SDValue GOTPtr;
2782  if (is64bit) {
2783  setUsesTOCBasePtr(DAG);
2784  SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2785  GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
2786  GOTReg, TGA);
2787  } else {
2788  if (picLevel == PICLevel::SmallPIC)
2789  GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2790  else
2791  GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2792  }
2793  return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
2794  GOTPtr, TGA, TGA);
2795  }
2796 
2797  if (Model == TLSModel::LocalDynamic) {
2798  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2799  SDValue GOTPtr;
2800  if (is64bit) {
2801  setUsesTOCBasePtr(DAG);
2802  SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2803  GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
2804  GOTReg, TGA);
2805  } else {
2806  if (picLevel == PICLevel::SmallPIC)
2807  GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2808  else
2809  GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2810  }
2811  SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
2812  PtrVT, GOTPtr, TGA, TGA);
2813  SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
2814  PtrVT, TLSAddr, TGA);
2815  return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
2816  }
2817 
2818  llvm_unreachable("Unknown TLS model!");
2819 }
2820 
2821 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
2822  SelectionDAG &DAG) const {
2823  EVT PtrVT = Op.getValueType();
2824  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
2825  SDLoc DL(GSDN);
2826  const GlobalValue *GV = GSDN->getGlobal();
2827 
2828  // 64-bit SVR4 ABI code is always position-independent.
2829  // The actual address of the GlobalValue is stored in the TOC.
2830  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2831  setUsesTOCBasePtr(DAG);
2832  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
2833  return getTOCEntry(DAG, DL, true, GA);
2834  }
2835 
2836  unsigned MOHiFlag, MOLoFlag;
2837  bool IsPIC = isPositionIndependent();
2838  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
2839 
2840  if (IsPIC && Subtarget.isSVR4ABI()) {
2841  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
2842  GSDN->getOffset(),
2844  return getTOCEntry(DAG, DL, false, GA);
2845  }
2846 
2847  SDValue GAHi =
2848  DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
2849  SDValue GALo =
2850  DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
2851 
2852  SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG);
2853 
2854  // If the global reference is actually to a non-lazy-pointer, we have to do an
2855  // extra load to get the address of the global.
2856  if (MOHiFlag & PPCII::MO_NLP_FLAG)
2857  Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
2858  return Ptr;
2859 }
2860 
2861 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
2862  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2863  SDLoc dl(Op);
2864 
2865  if (Op.getValueType() == MVT::v2i64) {
2866  // When the operands themselves are v2i64 values, we need to do something
2867  // special because VSX has no underlying comparison operations for these.
2868  if (Op.getOperand(0).getValueType() == MVT::v2i64) {
2869  // Equality can be handled by casting to the legal type for Altivec
2870  // comparisons, everything else needs to be expanded.
2871  if (CC == ISD::SETEQ || CC == ISD::SETNE) {
2872  return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
2873  DAG.getSetCC(dl, MVT::v4i32,
2874  DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
2875  DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
2876  CC));
2877  }
2878 
2879  return SDValue();
2880  }
2881 
2882  // We handle most of these in the usual way.
2883  return Op;
2884  }
2885 
2886  // If we're comparing for equality to zero, expose the fact that this is
2887  // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
2888  // fold the new nodes.
2889  if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
2890  return V;
2891 
2892  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2893  // Leave comparisons against 0 and -1 alone for now, since they're usually
2894  // optimized. FIXME: revisit this when we can custom lower all setcc
2895  // optimizations.
2896  if (C->isAllOnesValue() || C->isNullValue())
2897  return SDValue();
2898  }
2899 
2900  // If we have an integer seteq/setne, turn it into a compare against zero
2901  // by xor'ing the rhs with the lhs, which is faster than setting a
2902  // condition register, reading it back out, and masking the correct bit. The
2903  // normal approach here uses sub to do this instead of xor. Using xor exposes
2904  // the result to other bit-twiddling opportunities.
2905  EVT LHSVT = Op.getOperand(0).getValueType();
2906  if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
2907  EVT VT = Op.getValueType();
2908  SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
2909  Op.getOperand(1));
2910  return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
2911  }
2912  return SDValue();
2913 }
2914 
2915 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
2916  SDNode *Node = Op.getNode();
2917  EVT VT = Node->getValueType(0);
2918  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2919  SDValue InChain = Node->getOperand(0);
2920  SDValue VAListPtr = Node->getOperand(1);
2921  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
2922  SDLoc dl(Node);
2923 
2924  assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
2925 
2926  // gpr_index
2927  SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2928  VAListPtr, MachinePointerInfo(SV), MVT::i8);
2929  InChain = GprIndex.getValue(1);
2930 
2931  if (VT == MVT::i64) {
2932  // Check if GprIndex is even
2933  SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
2934  DAG.getConstant(1, dl, MVT::i32));
2935  SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
2936  DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
2937  SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
2938  DAG.getConstant(1, dl, MVT::i32));
2939  // Align GprIndex to be even if it isn't
2940  GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
2941  GprIndex);
2942  }
2943 
2944  // fpr index is 1 byte after gpr
2945  SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2946  DAG.getConstant(1, dl, MVT::i32));
2947 
2948  // fpr
2949  SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2950  FprPtr, MachinePointerInfo(SV), MVT::i8);
2951  InChain = FprIndex.getValue(1);
2952 
2953  SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2954  DAG.getConstant(8, dl, MVT::i32));
2955 
2956  SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2957  DAG.getConstant(4, dl, MVT::i32));
2958 
2959  // areas
2960  SDValue OverflowArea =
2961  DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
2962  InChain = OverflowArea.getValue(1);
2963 
2964  SDValue RegSaveArea =
2965  DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
2966  InChain = RegSaveArea.getValue(1);
2967 
2968  // select overflow_area if index > 8
2969  SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
2970  DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
2971 
2972  // adjustment constant gpr_index * 4/8
2973  SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
2974  VT.isInteger() ? GprIndex : FprIndex,
2975  DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
2976  MVT::i32));
2977 
2978  // OurReg = RegSaveArea + RegConstant
2979  SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
2980  RegConstant);
2981 
2982  // Floating types are 32 bytes into RegSaveArea
2983  if (VT.isFloatingPoint())
2984  OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
2985  DAG.getConstant(32, dl, MVT::i32));
2986 
2987  // increase {f,g}pr_index by 1 (or 2 if VT is i64)
2988  SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
2989  VT.isInteger() ? GprIndex : FprIndex,
2990  DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
2991  MVT::i32));
2992 
2993  InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
2994  VT.isInteger() ? VAListPtr : FprPtr,
2996 
2997  // determine if we should load from reg_save_area or overflow_area
2998  SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
2999 
3000  // increase overflow_area by 4/8 if gpr/fpr > 8
3001  SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3002  DAG.getConstant(VT.isInteger() ? 4 : 8,
3003  dl, MVT::i32));
3004 
3005  OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3006  OverflowAreaPlusN);
3007 
3008  InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3010 
3011  return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3012 }
3013 
3014 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3015  assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3016 
3017  // We have to copy the entire va_list struct:
3018  // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3019  return DAG.getMemcpy(Op.getOperand(0), Op,
3020  Op.getOperand(1), Op.getOperand(2),
3021  DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true,
3023 }
3024 
3025 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3026  SelectionDAG &DAG) const {
3027  return Op.getOperand(0);
3028 }
3029 
3030 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3031  SelectionDAG &DAG) const {
3032  SDValue Chain = Op.getOperand(0);
3033  SDValue Trmp = Op.getOperand(1); // trampoline
3034  SDValue FPtr = Op.getOperand(2); // nested function
3035  SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3036  SDLoc dl(Op);
3037 
3038  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3039  bool isPPC64 = (PtrVT == MVT::i64);
3040  Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3041 
3044 
3045  Entry.Ty = IntPtrTy;
3046  Entry.Node = Trmp; Args.push_back(Entry);
3047 
3048  // TrampSize == (isPPC64 ? 48 : 40);
3049  Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3050  isPPC64 ? MVT::i64 : MVT::i32);
3051  Args.push_back(Entry);
3052 
3053  Entry.Node = FPtr; Args.push_back(Entry);
3054  Entry.Node = Nest; Args.push_back(Entry);
3055 
3056  // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3058  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3060  DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3061 
3062  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3063  return CallResult.second;
3064 }
3065 
3066 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3067  MachineFunction &MF = DAG.getMachineFunction();
3068  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3069  EVT PtrVT = getPointerTy(MF.getDataLayout());
3070 
3071  SDLoc dl(Op);
3072 
3073  if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
3074  // vastart just stores the address of the VarArgsFrameIndex slot into the
3075  // memory location argument.
3076  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3077  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3078  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3079  MachinePointerInfo(SV));
3080  }
3081 
3082  // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3083  // We suppose the given va_list is already allocated.
3084  //
3085  // typedef struct {
3086  // char gpr; /* index into the array of 8 GPRs
3087  // * stored in the register save area
3088  // * gpr=0 corresponds to r3,
3089  // * gpr=1 to r4, etc.
3090  // */
3091  // char fpr; /* index into the array of 8 FPRs
3092  // * stored in the register save area
3093  // * fpr=0 corresponds to f1,
3094  // * fpr=1 to f2, etc.
3095  // */
3096  // char *overflow_arg_area;
3097  // /* location on stack that holds
3098  // * the next overflow argument
3099  // */
3100  // char *reg_save_area;
3101  // /* where r3:r10 and f1:f8 (if saved)
3102  // * are stored
3103  // */
3104  // } va_list[1];
3105 
3106  SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3107  SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3108  SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3109  PtrVT);
3110  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3111  PtrVT);
3112 
3113  uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3114  SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3115 
3116  uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3117  SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3118 
3119  uint64_t FPROffset = 1;
3120  SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3121 
3122  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3123 
3124  // Store first byte : number of int regs
3125  SDValue firstStore =
3126  DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3128  uint64_t nextOffset = FPROffset;
3129  SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3130  ConstFPROffset);
3131 
3132  // Store second byte : number of float regs
3133  SDValue secondStore =
3134  DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3135  MachinePointerInfo(SV, nextOffset), MVT::i8);
3136  nextOffset += StackOffset;
3137  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3138 
3139  // Store second word : arguments given on stack
3140  SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3141  MachinePointerInfo(SV, nextOffset));
3142  nextOffset += FrameOffset;
3143  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3144 
3145  // Store third word : arguments given in registers
3146  return DAG.getStore(thirdStore, dl, FR, nextPtr,
3147  MachinePointerInfo(SV, nextOffset));
3148 }
3149 
3150 #include "PPCGenCallingConv.inc"
3151 
3152 // Function whose sole purpose is to kill compiler warnings
3153 // stemming from unused functions included from PPCGenCallingConv.inc.
3154 CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
3155  return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
3156 }
3157 
3158 bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
3159  CCValAssign::LocInfo &LocInfo,
3160  ISD::ArgFlagsTy &ArgFlags,
3161  CCState &State) {
3162  return true;
3163 }
3164 
3165 bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
3166  MVT &LocVT,
3167  CCValAssign::LocInfo &LocInfo,
3168  ISD::ArgFlagsTy &ArgFlags,
3169  CCState &State) {
3170  static const MCPhysReg ArgRegs[] = {
3171  PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3172  PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3173  };
3174  const unsigned NumArgRegs = array_lengthof(ArgRegs);
3175 
3176  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
3177 
3178  // Skip one register if the first unallocated register has an even register
3179  // number and there are still argument registers available which have not been
3180  // allocated yet. RegNum is actually an index into ArgRegs, which means we
3181  // need to skip a register if RegNum is odd.
3182  if (RegNum != NumArgRegs && RegNum % 2 == 1) {
3183  State.AllocateReg(ArgRegs[RegNum]);
3184  }
3185 
3186  // Always return false here, as this function only makes sure that the first
3187  // unallocated register has an odd register number and does not actually
3188  // allocate a register for the current argument.
3189  return false;
3190 }
3191 
3192 bool
3194  MVT &LocVT,
3195  CCValAssign::LocInfo &LocInfo,
3196  ISD::ArgFlagsTy &ArgFlags,
3197  CCState &State) {
3198  static const MCPhysReg ArgRegs[] = {
3199  PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3200  PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3201  };
3202  const unsigned NumArgRegs = array_lengthof(ArgRegs);
3203 
3204  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
3205  int RegsLeft = NumArgRegs - RegNum;
3206 
3207  // Skip if there is not enough registers left for long double type (4 gpr regs
3208  // in soft float mode) and put long double argument on the stack.
3209  if (RegNum != NumArgRegs && RegsLeft < 4) {
3210  for (int i = 0; i < RegsLeft; i++) {
3211  State.AllocateReg(ArgRegs[RegNum + i]);
3212  }
3213  }
3214 
3215  return false;
3216 }
3217 
3218 bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
3219  MVT &LocVT,
3220  CCValAssign::LocInfo &LocInfo,
3221  ISD::ArgFlagsTy &ArgFlags,
3222  CCState &State) {
3223  static const MCPhysReg ArgRegs[] = {
3224  PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3225  PPC::F8
3226  };
3227 
3228  const unsigned NumArgRegs = array_lengthof(ArgRegs);
3229 
3230  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
3231 
3232  // If there is only one Floating-point register left we need to put both f64
3233  // values of a split ppc_fp128 value on the stack.
3234  if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
3235  State.AllocateReg(ArgRegs[RegNum]);
3236  }
3237 
3238  // Always return false here, as this function only makes sure that the two f64
3239  // values a ppc_fp128 value is split into are both passed in registers or both
3240  // passed on the stack and does not actually allocate a register for the
3241  // current argument.
3242  return false;
3243 }
3244 
3245 /// FPR - The set of FP registers that should be allocated for arguments,
3246 /// on Darwin.
3247 static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
3248  PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
3249  PPC::F11, PPC::F12, PPC::F13};
3250 
3251 /// QFPR - The set of QPX registers that should be allocated for arguments.
3252 static const MCPhysReg QFPR[] = {
3253  PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
3254  PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
3255 
3256 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
3257 /// the stack.
3258 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3259  unsigned PtrByteSize) {
3260  unsigned ArgSize = ArgVT.getStoreSize();
3261  if (Flags.isByVal())
3262  ArgSize = Flags.getByValSize();
3263 
3264  // Round up to multiples of the pointer size, except for array members,
3265  // which are always packed.
3266  if (!Flags.isInConsecutiveRegs())
3267  ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3268 
3269  return ArgSize;
3270 }
3271 
3272 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
3273 /// on the stack.
3274 static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
3275  ISD::ArgFlagsTy Flags,
3276  unsigned PtrByteSize) {
3277  unsigned Align = PtrByteSize;
3278 
3279  // Altivec parameters are padded to a 16 byte boundary.
3280  if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3281  ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3282  ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3283  ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3284  Align = 16;
3285  // QPX vector types stored in double-precision are padded to a 32 byte
3286  // boundary.
3287  else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
3288  Align = 32;
3289 
3290  // ByVal parameters are aligned as requested.
3291  if (Flags.isByVal()) {
3292  unsigned BVAlign = Flags.getByValAlign();
3293  if (BVAlign > PtrByteSize) {
3294  if (BVAlign % PtrByteSize != 0)
3296  "ByVal alignment is not a multiple of the pointer size");
3297 
3298  Align = BVAlign;
3299  }
3300  }
3301 
3302  // Array members are always packed to their original alignment.
3303  if (Flags.isInConsecutiveRegs()) {
3304  // If the array member was split into multiple registers, the first
3305  // needs to be aligned to the size of the full type. (Except for
3306  // ppcf128, which is only aligned as its f64 components.)
3307  if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3308  Align = OrigVT.getStoreSize();
3309  else
3310  Align = ArgVT.getStoreSize();
3311  }
3312 
3313  return Align;
3314 }
3315 
3316 /// CalculateStackSlotUsed - Return whether this argument will use its
3317 /// stack slot (instead of being passed in registers). ArgOffset,
3318 /// AvailableFPRs, and AvailableVRs must hold the current argument
3319 /// position, and will be updated to account for this argument.
3320 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
3321  ISD::ArgFlagsTy Flags,
3322  unsigned PtrByteSize,
3323  unsigned LinkageSize,
3324  unsigned ParamAreaSize,
3325  unsigned &ArgOffset,
3326  unsigned &AvailableFPRs,
3327  unsigned &AvailableVRs, bool HasQPX) {
3328  bool UseMemory = false;
3329 
3330  // Respect alignment of argument on the stack.
3331  unsigned Align =
3332  CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3333  ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3334  // If there's no space left in the argument save area, we must
3335  // use memory (this check also catches zero-sized arguments).
3336  if (ArgOffset >= LinkageSize + ParamAreaSize)
3337  UseMemory = true;
3338 
3339  // Allocate argument on the stack.
3340  ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3341  if (Flags.isInConsecutiveRegsLast())
3342  ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3343  // If we overran the argument save area, we must use memory
3344  // (this check catches arguments passed partially in memory)
3345  if (ArgOffset > LinkageSize + ParamAreaSize)
3346  UseMemory = true;
3347 
3348  // However, if the argument is actually passed in an FPR or a VR,
3349  // we don't use memory after all.
3350  if (!Flags.isByVal()) {
3351  if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
3352  // QPX registers overlap with the scalar FP registers.
3353  (HasQPX && (ArgVT == MVT::v4f32 ||
3354  ArgVT == MVT::v4f64 ||
3355  ArgVT == MVT::v4i1)))
3356  if (AvailableFPRs > 0) {
3357  --AvailableFPRs;
3358  return false;
3359  }
3360  if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3361  ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3362  ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3363  ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3364  if (AvailableVRs > 0) {
3365  --AvailableVRs;
3366  return false;
3367  }
3368  }
3369 
3370  return UseMemory;
3371 }
3372 
3373 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
3374 /// ensure minimum alignment required for target.
3376  unsigned NumBytes) {
3377  unsigned TargetAlign = Lowering->getStackAlignment();
3378  unsigned AlignMask = TargetAlign - 1;
3379  NumBytes = (NumBytes + AlignMask) & ~AlignMask;
3380  return NumBytes;
3381 }
3382 
3383 SDValue PPCTargetLowering::LowerFormalArguments(
3384  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3385  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3386  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3387  if (Subtarget.isSVR4ABI()) {
3388  if (Subtarget.isPPC64())
3389  return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
3390  dl, DAG, InVals);
3391  else
3392  return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
3393  dl, DAG, InVals);
3394  } else {
3395  return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
3396  dl, DAG, InVals);
3397  }
3398 }
3399 
3400 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3401  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3402  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3403  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3404 
3405  // 32-bit SVR4 ABI Stack Frame Layout:
3406  // +-----------------------------------+
3407  // +--> | Back chain |
3408  // | +-----------------------------------+
3409  // | | Floating-point register save area |
3410  // | +-----------------------------------+
3411  // | | General register save area |
3412  // | +-----------------------------------+
3413  // | | CR save word |
3414  // | +-----------------------------------+
3415  // | | VRSAVE save word |
3416  // | +-----------------------------------+
3417  // | | Alignment padding |
3418  // | +-----------------------------------+
3419  // | | Vector register save area |
3420  // | +-----------------------------------+
3421  // | | Local variable space |
3422  // | +-----------------------------------+
3423  // | | Parameter list area |
3424  // | +-----------------------------------+
3425  // | | LR save word |
3426  // | +-----------------------------------+
3427  // SP--> +--- | Back chain |
3428  // +-----------------------------------+
3429  //
3430  // Specifications:
3431  // System V Application Binary Interface PowerPC Processor Supplement
3432  // AltiVec Technology Programming Interface Manual
3433 
3434  MachineFunction &MF = DAG.getMachineFunction();
3435  MachineFrameInfo &MFI = MF.getFrameInfo();
3436  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3437 
3438  EVT PtrVT = getPointerTy(MF.getDataLayout());
3439  // Potential tail calls could cause overwriting of argument stack slots.
3440  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3441  (CallConv == CallingConv::Fast));
3442  unsigned PtrByteSize = 4;
3443 
3444  // Assign locations to all of the incoming arguments.
3446  PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3447  *DAG.getContext());
3448 
3449  // Reserve space for the linkage area on the stack.
3450  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3451  CCInfo.AllocateStack(LinkageSize, PtrByteSize);
3452  if (useSoftFloat() || hasSPE())
3453  CCInfo.PreAnalyzeFormalArguments(Ins);
3454 
3455  CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3456  CCInfo.clearWasPPCF128();
3457 
3458  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3459  CCValAssign &VA = ArgLocs[i];
3460 
3461  // Arguments stored in registers.
3462  if (VA.isRegLoc()) {
3463  const TargetRegisterClass *RC;
3464  EVT ValVT = VA.getValVT();
3465 
3466  switch (ValVT.getSimpleVT().SimpleTy) {
3467  default:
3468  llvm_unreachable("ValVT not supported by formal arguments Lowering");
3469  case MVT::i1:
3470  case MVT::i32:
3471  RC = &PPC::GPRCRegClass;
3472  break;
3473  case MVT::f32:
3474  if (Subtarget.hasP8Vector())
3475  RC = &PPC::VSSRCRegClass;
3476  else if (Subtarget.hasSPE())
3477  RC = &PPC::SPE4RCRegClass;
3478  else
3479  RC = &PPC::F4RCRegClass;
3480  break;
3481  case MVT::f64:
3482  if (Subtarget.hasVSX())
3483  RC = &PPC::VSFRCRegClass;
3484  else if (Subtarget.hasSPE())
3485  RC = &PPC::SPERCRegClass;
3486  else
3487  RC = &PPC::F8RCRegClass;
3488  break;
3489  case MVT::v16i8:
3490  case MVT::v8i16:
3491  case MVT::v4i32:
3492  RC = &PPC::VRRCRegClass;
3493  break;
3494  case MVT::v4f32:
3495  RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
3496  break;
3497  case MVT::v2f64:
3498  case MVT::v2i64:
3499  RC = &PPC::VRRCRegClass;
3500  break;
3501  case MVT::v4f64:
3502  RC = &PPC::QFRCRegClass;
3503  break;
3504  case MVT::v4i1:
3505  RC = &PPC::QBRCRegClass;
3506  break;
3507  }
3508 
3509  // Transform the arguments stored in physical registers into virtual ones.
3510  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3511  SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3512  ValVT == MVT::i1 ? MVT::i32 : ValVT);
3513 
3514  if (ValVT == MVT::i1)
3515  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3516 
3517  InVals.push_back(ArgValue);
3518  } else {
3519  // Argument stored in memory.
3520  assert(VA.isMemLoc());
3521 
3522  // Get the extended size of the argument type in stack
3523  unsigned ArgSize = VA.getLocVT().getStoreSize();
3524  // Get the actual size of the argument type
3525  unsigned ObjSize = VA.getValVT().getStoreSize();
3526  unsigned ArgOffset = VA.getLocMemOffset();
3527  // Stack objects in PPC32 are right justified.
3528  ArgOffset += ArgSize - ObjSize;
3529  int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
3530 
3531  // Create load nodes to retrieve arguments from the stack.
3532  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3533  InVals.push_back(
3534  DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3535  }
3536  }
3537 
3538  // Assign locations to all of the incoming aggregate by value arguments.
3539  // Aggregates passed by value are stored in the local variable space of the
3540  // caller's stack frame, right above the parameter list area.
3541  SmallVector<CCValAssign, 16> ByValArgLocs;
3542  CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3543  ByValArgLocs, *DAG.getContext());
3544 
3545  // Reserve stack space for the allocations in CCInfo.
3546  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
3547 
3548  CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3549 
3550  // Area that is at least reserved in the caller of this function.
3551  unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3552  MinReservedArea = std::max(MinReservedArea, LinkageSize);
3553 
3554  // Set the size that is at least reserved in caller of this function. Tail
3555  // call optimized function's reserved stack space needs to be aligned so that
3556  // taking the difference between two stack areas will result in an aligned
3557  // stack.
3558  MinReservedArea =
3559  EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3560  FuncInfo->setMinReservedArea(MinReservedArea);
3561 
3562  SmallVector<SDValue, 8> MemOps;
3563 
3564  // If the function takes variable number of arguments, make a frame index for
3565  // the start of the first vararg value... for expansion of llvm.va_start.
3566  if (isVarArg) {
3567  static const MCPhysReg GPArgRegs[] = {
3568  PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3569  PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3570  };
3571  const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3572 
3573  static const MCPhysReg FPArgRegs[] = {
3574  PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3575  PPC::F8
3576  };
3577  unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3578 
3579  if (useSoftFloat() || hasSPE())
3580  NumFPArgRegs = 0;
3581 
3582  FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3583  FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3584 
3585  // Make room for NumGPArgRegs and NumFPArgRegs.
3586  int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3587  NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3588 
3589  FuncInfo->setVarArgsStackOffset(
3590  MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3591  CCInfo.getNextStackOffset(), true));
3592 
3593  FuncInfo->setVarArgsFrameIndex(MFI.CreateStackObject(Depth, 8, false));
3594  SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3595 
3596  // The fixed integer arguments of a variadic function are stored to the
3597  // VarArgsFrameIndex on the stack so that they may be loaded by
3598  // dereferencing the result of va_next.
3599  for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3600  // Get an existing live-in vreg, or add a new one.
3601  unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3602  if (!VReg)
3603  VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3604 
3605  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3606  SDValue Store =
3607  DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3608  MemOps.push_back(Store);
3609  // Increment the address by four for the next argument to store
3610  SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3611  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3612  }
3613 
3614  // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3615  // is set.
3616  // The double arguments are stored to the VarArgsFrameIndex
3617  // on the stack.
3618  for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
3619  // Get an existing live-in vreg, or add a new one.
3620  unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3621  if (!VReg)
3622  VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3623 
3624  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3625  SDValue Store =
3626  DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3627  MemOps.push_back(Store);
3628  // Increment the address by eight for the next argument to store
3629  SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3630  PtrVT);
3631  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3632  }
3633  }
3634 
3635  if (!MemOps.empty())
3636  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3637 
3638  return Chain;
3639 }
3640 
3641 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3642 // value to MVT::i64 and then truncate to the correct register size.
3643 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3644  EVT ObjectVT, SelectionDAG &DAG,
3645  SDValue ArgVal,
3646  const SDLoc &dl) const {
3647  if (Flags.isSExt())
3648  ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3649  DAG.getValueType(ObjectVT));
3650  else if (Flags.isZExt())
3651  ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3652  DAG.getValueType(ObjectVT));
3653 
3654  return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3655 }
3656 
3657 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3658  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3659  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3660  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3661  // TODO: add description of PPC stack frame format, or at least some docs.
3662  //
3663  bool isELFv2ABI = Subtarget.isELFv2ABI();
3664  bool isLittleEndian = Subtarget.isLittleEndian();
3665  MachineFunction &MF = DAG.getMachineFunction();
3666  MachineFrameInfo &MFI = MF.getFrameInfo();
3667  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3668 
3669  assert(!(CallConv == CallingConv::Fast && isVarArg) &&
3670  "fastcc not supported on varargs functions");
3671 
3672  EVT PtrVT = getPointerTy(MF.getDataLayout());
3673  // Potential tail calls could cause overwriting of argument stack slots.
3674  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3675  (CallConv == CallingConv::Fast));
3676  unsigned PtrByteSize = 8;
3677  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3678 
3679  static const MCPhysReg GPR[] = {
3680  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3681  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3682  };
3683  static const MCPhysReg VR[] = {
3684  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3685  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3686  };
3687 
3688  const unsigned Num_GPR_Regs = array_lengthof(GPR);
3689  const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3690  const unsigned Num_VR_Regs = array_lengthof(VR);
3691  const unsigned Num_QFPR_Regs = Num_FPR_Regs;
3692 
3693  // Do a first pass over the arguments to determine whether the ABI
3694  // guarantees that our caller has allocated the parameter save area
3695  // on its stack frame. In the ELFv1 ABI, this is always the case;
3696  // in the ELFv2 ABI, it is true if this is a vararg function or if
3697  // any parameter is located in a stack slot.
3698 
3699  bool HasParameterArea = !isELFv2ABI || isVarArg;
3700  unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
3701  unsigned NumBytes = LinkageSize;
3702  unsigned AvailableFPRs = Num_FPR_Regs;
3703  unsigned AvailableVRs = Num_VR_Regs;
3704  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3705  if (Ins[i].Flags.isNest())
3706  continue;
3707 
3708  if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
3709  PtrByteSize, LinkageSize, ParamAreaSize,
3710  NumBytes, AvailableFPRs, AvailableVRs,
3711  Subtarget.hasQPX()))
3712  HasParameterArea = true;
3713  }
3714 
3715  // Add DAG nodes to load the arguments or copy them out of registers. On
3716  // entry to a function on PPC, the arguments start after the linkage area,
3717  // although the first ones are often in registers.
3718 
3719  unsigned ArgOffset = LinkageSize;
3720  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3721  unsigned &QFPR_idx = FPR_idx;
3722  SmallVector<SDValue, 8> MemOps;
3724  unsigned CurArgIdx = 0;
3725  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3726  SDValue ArgVal;
3727  bool needsLoad = false;
3728  EVT ObjectVT = Ins[ArgNo].VT;
3729  EVT OrigVT = Ins[ArgNo].ArgVT;
3730  unsigned ObjSize = ObjectVT.getStoreSize();
3731  unsigned ArgSize = ObjSize;
3732  ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3733  if (Ins[ArgNo].isOrigArg()) {
3734  std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3735  CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3736  }
3737  // We re-align the argument offset for each argument, except when using the
3738  // fast calling convention, when we need to make sure we do that only when
3739  // we'll actually use a stack slot.
3740  unsigned CurArgOffset, Align;
3741  auto ComputeArgOffset = [&]() {
3742  /* Respect alignment of argument on the stack. */
3743  Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
3744  ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3745  CurArgOffset = ArgOffset;
3746  };
3747 
3748  if (CallConv != CallingConv::Fast) {
3749  ComputeArgOffset();
3750 
3751  /* Compute GPR index associated with argument offset. */
3752  GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3753  GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
3754  }
3755 
3756  // FIXME the codegen can be much improved in some cases.
3757  // We do not have to keep everything in memory.
3758  if (Flags.isByVal()) {
3759  assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3760 
3761  if (CallConv == CallingConv::Fast)
3762  ComputeArgOffset();
3763 
3764  // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3765  ObjSize = Flags.getByValSize();
3766  ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3767  // Empty aggregate parameters do not take up registers. Examples:
3768  // struct { } a;
3769  // union { } b;
3770  // int c[0];
3771  // etc. However, we have to provide a place-holder in InVals, so
3772  // pretend we have an 8-byte item at the current address for that
3773  // purpose.
3774  if (!ObjSize) {
3775  int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
3776  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3777  InVals.push_back(FIN);
3778  continue;
3779  }
3780 
3781  // Create a stack object covering all stack doublewords occupied
3782  // by the argument. If the argument is (fully or partially) on
3783  // the stack, or if the argument is fully in registers but the
3784  // caller has allocated the parameter save anyway, we can refer
3785  // directly to the caller's stack frame. Otherwise, create a
3786  // local copy in our own frame.
3787  int FI;
3788  if (HasParameterArea ||
3789  ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
3790  FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
3791  else
3792  FI = MFI.CreateStackObject(ArgSize, Align, false);
3793  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3794 
3795  // Handle aggregates smaller than 8 bytes.
3796  if (ObjSize < PtrByteSize) {
3797  // The value of the object is its address, which differs from the
3798  // address of the enclosing doubleword on big-endian systems.
3799  SDValue Arg = FIN;
3800  if (!isLittleEndian) {
3801  SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
3802  Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
3803  }
3804  InVals.push_back(Arg);
3805 
3806  if (GPR_idx != Num_GPR_Regs) {
3807  unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3808  FuncInfo->addLiveInAttr(VReg, Flags);
3809  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3810  SDValue Store;
3811 
3812  if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
3813  EVT ObjType = (ObjSize == 1 ? MVT::i8 :
3814  (ObjSize == 2 ? MVT::i16 : MVT::i32));
3815  Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
3816  MachinePointerInfo(&*FuncArg), ObjType);
3817  } else {
3818  // For sizes that don't fit a truncating store (3, 5, 6, 7),
3819  // store the whole register as-is to the parameter save area
3820  // slot.
3821  Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3822  MachinePointerInfo(&*FuncArg));
3823  }
3824 
3825  MemOps.push_back(Store);
3826  }
3827  // Whether we copied from a register or not, advance the offset
3828  // into the parameter save area by a full doubleword.
3829  ArgOffset += PtrByteSize;
3830  continue;
3831  }
3832 
3833  // The value of the object is its address, which is the address of
3834  // its first stack doubleword.
3835  InVals.push_back(FIN);
3836 
3837  // Store whatever pieces of the object are in registers to memory.
3838  for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
3839  if (GPR_idx == Num_GPR_Regs)
3840  break;
3841 
3842  unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3843  FuncInfo->addLiveInAttr(VReg, Flags);
3844  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3845  SDValue Addr = FIN;
3846  if (j) {
3847  SDValue Off = DAG.getConstant(j, dl, PtrVT);
3848  Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
3849  }
3850  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
3851  MachinePointerInfo(&*FuncArg, j));
3852  MemOps.push_back(Store);
3853  ++GPR_idx;
3854  }
3855  ArgOffset += ArgSize;
3856  continue;
3857  }
3858 
3859  switch (ObjectVT.getSimpleVT().SimpleTy) {
3860  default: llvm_unreachable("Unhandled argument type!");
3861  case MVT::i1:
3862  case MVT::i32:
3863  case MVT::i64:
3864  if (Flags.isNest()) {
3865  // The 'nest' parameter, if any, is passed in R11.
3866  unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
3867  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3868 
3869  if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3870  ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3871 
3872  break;
3873  }
3874 
3875  // These can be scalar arguments or elements of an integer array type
3876  // passed directly. Clang may use those instead of "byval" aggregate
3877  // types to avoid forcing arguments to memory unnecessarily.
3878  if (GPR_idx != Num_GPR_Regs) {
3879  unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3880  FuncInfo->addLiveInAttr(VReg, Flags);
3881  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3882 
3883  if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3884  // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3885  // value to MVT::i64 and then truncate to the correct register size.
3886  ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3887  } else {
3888  if (CallConv == CallingConv::Fast)
3889  ComputeArgOffset();
3890 
3891  needsLoad = true;
3892  ArgSize = PtrByteSize;
3893  }
3894  if (CallConv != CallingConv::Fast || needsLoad)
3895  ArgOffset += 8;
3896  break;
3897 
3898  case MVT::f32:
3899  case MVT::f64:
3900  // These can be scalar arguments or elements of a float array type
3901  // passed directly. The latter are used to implement ELFv2 homogenous
3902  // float aggregates.
3903  if (FPR_idx != Num_FPR_Regs) {
3904  unsigned VReg;
3905 
3906  if (ObjectVT == MVT::f32)
3907  VReg = MF.addLiveIn(FPR[FPR_idx],
3908  Subtarget.hasP8Vector()
3909  ? &PPC::VSSRCRegClass
3910  : &PPC::F4RCRegClass);
3911  else
3912  VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
3913  ? &PPC::VSFRCRegClass
3914  : &PPC::F8RCRegClass);
3915 
3916  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3917  ++FPR_idx;
3918  } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
3919  // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
3920  // once we support fp <-> gpr moves.
3921 
3922  // This can only ever happen in the presence of f32 array types,
3923  // since otherwise we never run out of FPRs before running out
3924  // of GPRs.
3925  unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3926  FuncInfo->addLiveInAttr(VReg, Flags);
3927  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3928 
3929  if (ObjectVT == MVT::f32) {
3930  if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
3931  ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
3932  DAG.getConstant(32, dl, MVT::i32));
3933  ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
3934  }
3935 
3936  ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
3937  } else {
3938  if (CallConv == CallingConv::Fast)
3939  ComputeArgOffset();
3940 
3941  needsLoad = true;
3942  }
3943 
3944  // When passing an array of floats, the array occupies consecutive
3945  // space in the argument area; only round up to the next doubleword
3946  // at the end of the array. Otherwise, each float takes 8 bytes.
3947  if (CallConv != CallingConv::Fast || needsLoad) {
3948  ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
3949  ArgOffset += ArgSize;
3950  if (Flags.isInConsecutiveRegsLast())
3951  ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3952  }
3953  break;
3954  case MVT::v4f32:
3955  case MVT::v4i32:
3956  case MVT::v8i16:
3957  case MVT::v16i8:
3958  case MVT::v2f64:
3959  case MVT::v2i64:
3960  case MVT::v1i128:
3961  case MVT::f128:
3962  if (!Subtarget.hasQPX()) {
3963  // These can be scalar arguments or elements of a vector array type
3964  // passed directly. The latter are used to implement ELFv2 homogenous
3965  // vector aggregates.
3966  if (VR_idx != Num_VR_Regs) {
3967  unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3968  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3969  ++VR_idx;
3970  } else {
3971  if (CallConv == CallingConv::Fast)
3972  ComputeArgOffset();
3973  needsLoad = true;
3974  }
3975  if (CallConv != CallingConv::Fast || needsLoad)
3976  ArgOffset += 16;
3977  break;
3978  } // not QPX
3979 
3980  assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
3981  "Invalid QPX parameter type");
3983 
3984  case MVT::v4f64:
3985  case MVT::v4i1:
3986  // QPX vectors are treated like their scalar floating-point subregisters
3987  // (except that they're larger).
3988  unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
3989  if (QFPR_idx != Num_QFPR_Regs) {
3990  const TargetRegisterClass *RC;
3991  switch (ObjectVT.getSimpleVT().SimpleTy) {
3992  case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
3993  case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
3994  default: RC = &PPC::QBRCRegClass; break;
3995  }
3996 
3997  unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
3998  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3999  ++QFPR_idx;
4000  } else {
4001  if (CallConv == CallingConv::Fast)
4002  ComputeArgOffset();
4003  needsLoad = true;
4004  }
4005  if (CallConv != CallingConv::Fast || needsLoad)
4006  ArgOffset += Sz;
4007  break;
4008  }
4009 
4010  // We need to load the argument to a virtual register if we determined
4011  // above that we ran out of physical registers of the appropriate type.
4012  if (needsLoad) {
4013  if (ObjSize < ArgSize && !isLittleEndian)
4014  CurArgOffset += ArgSize - ObjSize;
4015  int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4016  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4017  ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4018  }
4019 
4020  InVals.push_back(ArgVal);
4021  }
4022 
4023  // Area that is at least reserved in the caller of this function.
4024  unsigned MinReservedArea;
4025  if (HasParameterArea)
4026  MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4027  else
4028  MinReservedArea = LinkageSize;
4029 
4030  // Set the size that is at least reserved in caller of this function. Tail
4031  // call optimized functions' reserved stack space needs to be aligned so that
4032  // taking the difference between two stack areas will result in an aligned
4033  // stack.
4034  MinReservedArea =
4035  EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4036  FuncInfo->setMinReservedArea(MinReservedArea);
4037 
4038  // If the function takes variable number of arguments, make a frame index for
4039  // the start of the first vararg value... for expansion of llvm.va_start.
4040  if (isVarArg) {
4041  int Depth = ArgOffset;
4042 
4043  FuncInfo->setVarArgsFrameIndex(
4044  MFI.CreateFixedObject(PtrByteSize, Depth, true));
4045  SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4046 
4047  // If this function is vararg, store any remaining integer argument regs
4048  // to their spots on the stack so that they may be loaded by dereferencing
4049  // the result of va_next.
4050  for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4051  GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4052  unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4053  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4054  SDValue Store =
4055  DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4056  MemOps.push_back(Store);
4057  // Increment the address by four for the next argument to store
4058  SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4059  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4060  }
4061  }
4062 
4063  if (!MemOps.empty())
4064  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4065 
4066  return Chain;
4067 }
4068 
4069 SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
4070  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4071  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4072  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4073  // TODO: add description of PPC stack frame format, or at least some docs.
4074  //
4075  MachineFunction &MF = DAG.getMachineFunction();
4076  MachineFrameInfo &MFI = MF.getFrameInfo();
4077  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4078 
4079  EVT PtrVT = getPointerTy(MF.getDataLayout());
4080  bool isPPC64 = PtrVT == MVT::i64;
4081  // Potential tail calls could cause overwriting of argument stack slots.
4082  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4083  (CallConv == CallingConv::Fast));
4084  unsigned PtrByteSize = isPPC64 ? 8 : 4;
4085  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4086  unsigned ArgOffset = LinkageSize;
4087  // Area that is at least reserved in caller of this function.
4088  unsigned MinReservedArea = ArgOffset;
4089 
4090  static const MCPhysReg GPR_32[] = { // 32-bit registers.
4091  PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4092  PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4093  };
4094  static const MCPhysReg GPR_64[] = { // 64-bit registers.
4095  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4096  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4097  };
4098  static const MCPhysReg VR[] = {
4099  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4100  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4101  };
4102 
4103  const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
4104  const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4105  const unsigned Num_VR_Regs = array_lengthof( VR);
4106 
4107  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4108 
4109  const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
4110 
4111  // In 32-bit non-varargs functions, the stack space for vectors is after the
4112  // stack space for non-vectors. We do not use this space unless we have
4113  // too many vectors to fit in registers, something that only occurs in
4114  // constructed examples:), but we have to walk the arglist to figure
4115  // that out...for the pathological case, compute VecArgOffset as the
4116  // start of the vector parameter area. Computing VecArgOffset is the
4117  // entire point of the following loop.
4118  unsigned VecArgOffset = ArgOffset;
4119  if (!isVarArg && !isPPC64) {
4120  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
4121  ++ArgNo) {
4122  EVT ObjectVT = Ins[ArgNo].VT;
4123  ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4124 
4125  if (Flags.isByVal()) {
4126  // ObjSize is the true size, ArgSize rounded up to multiple of regs.
4127  unsigned ObjSize = Flags.getByValSize();
4128  unsigned ArgSize =
4129  ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4130  VecArgOffset += ArgSize;
4131  continue;
4132  }
4133 
4134  switch(ObjectVT.getSimpleVT().SimpleTy) {
4135  default: llvm_unreachable("Unhandled argument type!");
4136  case MVT::i1:
4137  case MVT::i32:
4138  case MVT::f32:
4139  VecArgOffset += 4;
4140  break;
4141  case MVT::i64: // PPC64
4142  case MVT::f64:
4143  // FIXME: We are guaranteed to be !isPPC64 at this point.
4144  // Does MVT::i64 apply?
4145  VecArgOffset += 8;
4146  break;
4147  case MVT::v4f32:
4148  case MVT::v4i32:
4149  case MVT::v8i16:
4150  case MVT::v16i8:
4151  // Nothing to do, we're only looking at Nonvector args here.
4152  break;
4153  }
4154  }
4155  }
4156  // We've found where the vector parameter area in memory is. Skip the
4157  // first 12 parameters; these don't use that memory.
4158  VecArgOffset = ((VecArgOffset+15)/16)*16;
4159  VecArgOffset += 12*16;
4160 
4161  // Add DAG nodes to load the arguments or copy them out of registers. On
4162  // entry to a function on PPC, the arguments start after the linkage area,
4163  // although the first ones are often in registers.
4164 
4165  SmallVector<SDValue, 8> MemOps;
4166  unsigned nAltivecParamsAtEnd = 0;
4168  unsigned CurArgIdx = 0;
4169  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4170  SDValue ArgVal;
4171  bool needsLoad = false;
4172  EVT ObjectVT = Ins[ArgNo].VT;
4173  unsigned ObjSize = ObjectVT.getSizeInBits()/8;
4174  unsigned ArgSize = ObjSize;
4175  ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4176  if (Ins[ArgNo].isOrigArg()) {
4177  std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4178  CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4179  }
4180  unsigned CurArgOffset = ArgOffset;
4181 
4182  // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
4183  if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
4184  ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
4185  if (isVarArg || isPPC64) {
4186  MinReservedArea = ((MinReservedArea+15)/16)*16;
4187  MinReservedArea += CalculateStackSlotSize(ObjectVT,
4188  Flags,
4189  PtrByteSize);
4190  } else nAltivecParamsAtEnd++;
4191  } else
4192  // Calculate min reserved area.
4193  MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
4194  Flags,
4195  PtrByteSize);
4196 
4197  // FIXME the codegen can be much improved in some cases.
4198  // We do not have to keep everything in memory.
4199  if (Flags.isByVal()) {
4200  assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4201 
4202  // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4203  ObjSize = Flags.getByValSize();
4204  ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4205  // Objects of size 1 and 2 are right justified, everything else is
4206  // left justified. This means the memory address is adjusted forwards.
4207  if (ObjSize==1 || ObjSize==2) {
4208  CurArgOffset = CurArgOffset + (4 - ObjSize);
4209  }
4210  // The value of the object is its address.
4211  int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
4212  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4213  InVals.push_back(FIN);
4214  if (ObjSize==1 || ObjSize==2) {
4215  if (GPR_idx != Num_GPR_Regs) {
4216  unsigned VReg;
4217  if (isPPC64)
4218  VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4219  else
4220  VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4221  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4222  EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
4223  SDValue Store =
4224  DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
4225  MachinePointerInfo(&*FuncArg), ObjType);
4226  MemOps.push_back(Store);
4227  ++GPR_idx;
4228  }
4229 
4230  ArgOffset += PtrByteSize;
4231 
4232  continue;
4233  }
4234  for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4235  // Store whatever pieces of the object are in registers
4236  // to memory. ArgOffset will be the address of the beginning
4237  // of the object.
4238  if (GPR_idx != Num_GPR_Regs) {
4239  unsigned VReg;
4240  if (isPPC64)
4241  VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4242  else
4243  VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4244  int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4245  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4246  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4247  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4248  MachinePointerInfo(&*FuncArg, j));
4249  MemOps.push_back(Store);
4250  ++GPR_idx;
4251  ArgOffset += PtrByteSize;
4252  } else {
4253  ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
4254  break;
4255  }
4256  }
4257  continue;
4258  }
4259 
4260  switch (ObjectVT.getSimpleVT().SimpleTy) {
4261  default: llvm_unreachable("Unhandled argument type!");
4262  case MVT::i1:
4263  case MVT::i32:
4264  if (!isPPC64) {
4265  if (GPR_idx != Num_GPR_Regs) {
4266  unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4267  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4268 
4269  if (ObjectVT == MVT::i1)
4270  ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
4271 
4272  ++GPR_idx;
4273  } else {
4274  needsLoad = true;
4275  ArgSize = PtrByteSize;
4276  }
4277  // All int arguments reserve stack space in the Darwin ABI.
4278  ArgOffset += PtrByteSize;
4279  break;
4280  }
4282  case MVT::i64: // PPC64
4283  if (GPR_idx != Num_GPR_Regs) {
4284  unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4285  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4286 
4287  if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4288  // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4289  // value to MVT::i64 and then truncate to the correct register size.
4290  ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4291 
4292  ++GPR_idx;
4293  } else {
4294  needsLoad = true;
4295  ArgSize = PtrByteSize;
4296  }
4297  // All int arguments reserve stack space in the Darwin ABI.
4298  ArgOffset += 8;
4299  break;
4300 
4301  case MVT::f32:
4302  case MVT::f64:
4303  // Every 4 bytes of argument space consumes one of the GPRs available for
4304  // argument passing.
4305  if (GPR_idx != Num_GPR_Regs) {
4306  ++GPR_idx;
4307  if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
4308  ++GPR_idx;
4309  }
4310  if (FPR_idx != Num_FPR_Regs) {
4311  unsigned VReg;
4312 
4313  if (ObjectVT == MVT::f32)
4314  VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
4315  else
4316  VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
4317 
4318  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4319  ++FPR_idx;
4320  } else {
4321  needsLoad = true;
4322  }
4323 
4324  // All FP arguments reserve stack space in the Darwin ABI.
4325  ArgOffset += isPPC64 ? 8 : ObjSize;
4326  break;
4327  case MVT::v4f32:
4328  case MVT::v4i32:
4329  case MVT::v8i16:
4330  case MVT::v16i8:
4331  // Note that vector arguments in registers don't reserve stack space,
4332  // except in varargs functions.
4333  if (VR_idx != Num_VR_Regs) {
4334  unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4335  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4336  if (isVarArg) {
4337  while ((ArgOffset % 16) != 0) {
4338  ArgOffset += PtrByteSize;
4339  if (GPR_idx != Num_GPR_Regs)
4340  GPR_idx++;
4341  }
4342  ArgOffset += 16;
4343  GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
4344  }
4345  ++VR_idx;
4346  } else {
4347  if (!isVarArg && !isPPC64) {
4348  // Vectors go after all the nonvectors.
4349  CurArgOffset = VecArgOffset;
4350  VecArgOffset += 16;
4351  } else {
4352  // Vectors are aligned.
4353  ArgOffset = ((ArgOffset+15)/16)*16;
4354  CurArgOffset = ArgOffset;
4355  ArgOffset += 16;
4356  }
4357  needsLoad = true;
4358  }
4359  break;
4360  }
4361 
4362  // We need to load the argument to a virtual register if we determined above
4363  // that we ran out of physical registers of the appropriate type.
4364  if (needsLoad) {
4365  int FI = MFI.CreateFixedObject(ObjSize,
4366  CurArgOffset + (ArgSize - ObjSize),
4367  isImmutable);
4368  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4369  ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4370  }
4371 
4372  InVals.push_back(ArgVal);
4373  }
4374 
4375  // Allow for Altivec parameters at the end, if needed.
4376  if (nAltivecParamsAtEnd) {
4377  MinReservedArea = ((MinReservedArea+15)/16)*16;
4378  MinReservedArea += 16*nAltivecParamsAtEnd;
4379  }
4380 
4381  // Area that is at least reserved in the caller of this function.
4382  MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
4383 
4384  // Set the size that is at least reserved in caller of this function. Tail
4385  // call optimized functions' reserved stack space needs to be aligned so that
4386  // taking the difference between two stack areas will result in an aligned
4387  // stack.
4388  MinReservedArea =
4389  EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4390  FuncInfo->setMinReservedArea(MinReservedArea);
4391 
4392  // If the function takes variable number of arguments, make a frame index for
4393  // the start of the first vararg value... for expansion of llvm.va_start.
4394  if (isVarArg) {
4395  int Depth = ArgOffset;
4396 
4397  FuncInfo->setVarArgsFrameIndex(
4398  MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4399  Depth, true));
4400  SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4401 
4402  // If this function is vararg, store any remaining integer argument regs
4403  // to their spots on the stack so that they may be loaded by dereferencing
4404  // the result of va_next.
4405  for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
4406  unsigned VReg;
4407 
4408  if (isPPC64)
4409  VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4410  else
4411  VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4412 
4413  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4414  SDValue Store =
4415  DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4416  MemOps.push_back(Store);
4417  // Increment the address by four for the next argument to store
4418  SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4419  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4420  }
4421  }
4422 
4423  if (!MemOps.empty())
4424  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4425 
4426  return Chain;
4427 }
4428 
4429 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4430 /// adjusted to accommodate the arguments for the tailcall.
4431 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4432  unsigned ParamSize) {
4433 
4434  if (!isTailCall) return 0;
4435 
4437  unsigned CallerMinReservedArea = FI->getMinReservedArea();
4438  int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4439  // Remember only if the new adjustment is bigger.
4440  if (SPDiff < FI->getTailCallSPDelta())
4441  FI->setTailCallSPDelta(SPDiff);
4442 
4443  return SPDiff;
4444 }
4445 
4447 
4448 static bool
4449 callsShareTOCBase(const Function *Caller, SDValue Callee,
4450  const TargetMachine &TM) {
4451  // If !G, Callee can be an external symbol.
4453  if (!G)
4454  return false;
4455 
4456  // The medium and large code models are expected to provide a sufficiently
4457  // large TOC to provide all data addressing needs of a module with a
4458  // single TOC. Since each module will be addressed with a single TOC then we
4459  // only need to check that caller and callee don't cross dso boundaries.
4460  if (CodeModel::Medium == TM.getCodeModel() ||
4462  return TM.shouldAssumeDSOLocal(*Caller->getParent(), G->getGlobal());
4463 
4464  // Otherwise we need to ensure callee and caller are in the same section,
4465  // since the linker may allocate multiple TOCs, and we don't know which
4466  // sections will belong to the same TOC base.
4467 
4468  const GlobalValue *GV = G->getGlobal();
4469  if (!GV->isStrongDefinitionForLinker())
4470  return false;
4471 
4472  // Any explicitly-specified sections and section prefixes must also match.
4473  // Also, if we're using -ffunction-sections, then each function is always in
4474  // a different section (the same is true for COMDAT functions).
4475  if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4476  GV->getSection() != Caller->getSection())
4477  return false;
4478  if (const auto *F = dyn_cast<Function>(GV)) {
4479  if (F->getSectionPrefix() != Caller->getSectionPrefix())
4480  return false;
4481  }
4482 
4483  // If the callee might be interposed, then we can't assume the ultimate call
4484  // target will be in the same section. Even in cases where we can assume that
4485  // interposition won't happen, in any case where the linker might insert a
4486  // stub to allow for interposition, we must generate code as though
4487  // interposition might occur. To understand why this matters, consider a
4488  // situation where: a -> b -> c where the arrows indicate calls. b and c are
4489  // in the same section, but a is in a different module (i.e. has a different
4490  // TOC base pointer). If the linker allows for interposition between b and c,
4491  // then it will generate a stub for the call edge between b and c which will
4492  // save the TOC pointer into the designated stack slot allocated by b. If we
4493  // return true here, and therefore allow a tail call between b and c, that
4494  // stack slot won't exist and the b -> c stub will end up saving b'c TOC base
4495  // pointer into the stack slot allocated by a (where the a -> b stub saved
4496  // a's TOC base pointer). If we're not considering a tail call, but rather,
4497  // whether a nop is needed after the call instruction in b, because the linker
4498  // will insert a stub, it might complain about a missing nop if we omit it
4499  // (although many don't complain in this case).
4500  if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4501  return false;
4502 
4503  return true;
4504 }
4505 
4506 static bool
4508  const SmallVectorImpl<ISD::OutputArg> &Outs) {
4509  assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64());
4510 
4511  const unsigned PtrByteSize = 8;
4512  const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4513 
4514  static const MCPhysReg GPR[] = {
4515  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4516  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4517  };
4518  static const MCPhysReg VR[] = {
4519  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4520  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4521  };
4522 
4523  const unsigned NumGPRs = array_lengthof(GPR);
4524  const unsigned NumFPRs = 13;
4525  const unsigned NumVRs = array_lengthof(VR);
4526  const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4527 
4528  unsigned NumBytes = LinkageSize;
4529  unsigned AvailableFPRs = NumFPRs;
4530  unsigned AvailableVRs = NumVRs;
4531 
4532  for (const ISD::OutputArg& Param : Outs) {
4533  if (Param.Flags.isNest()) continue;
4534 
4535  if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags,
4536  PtrByteSize, LinkageSize, ParamAreaSize,
4537  NumBytes, AvailableFPRs, AvailableVRs,
4538  Subtarget.hasQPX()))
4539  return true;
4540  }
4541  return false;
4542 }
4543 
4544 static bool
4546  if (CS.arg_size() != CallerFn->arg_size())
4547  return false;
4548 
4549  ImmutableCallSite::arg_iterator CalleeArgIter = CS.arg_begin();
4550  ImmutableCallSite::arg_iterator CalleeArgEnd = CS.arg_end();
4551  Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4552 
4553  for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4554  const Value* CalleeArg = *CalleeArgIter;
4555  const Value* CallerArg = &(*CallerArgIter);
4556  if (CalleeArg == CallerArg)
4557  continue;
4558 
4559  // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4560  // tail call @callee([4 x i64] undef, [4 x i64] %b)
4561  // }
4562  // 1st argument of callee is undef and has the same type as caller.
4563  if (CalleeArg->getType() == CallerArg->getType() &&
4564  isa<UndefValue>(CalleeArg))
4565  continue;
4566 
4567  return false;
4568  }
4569 
4570  return true;
4571 }
4572 
4573 // Returns true if TCO is possible between the callers and callees
4574 // calling conventions.
4575 static bool
4577  CallingConv::ID CalleeCC) {
4578  // Tail calls are possible with fastcc and ccc.
4579  auto isTailCallableCC = [] (CallingConv::ID CC){
4580  return CC == CallingConv::C || CC == CallingConv::Fast;
4581  };
4582  if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4583  return false;
4584 
4585  // We can safely tail call both fastcc and ccc callees from a c calling
4586  // convention caller. If the caller is fastcc, we may have less stack space
4587  // than a non-fastcc caller with the same signature so disable tail-calls in
4588  // that case.
4589  return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4590 }
4591 
4592 bool
4593 PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4594  SDValue Callee,
4595  CallingConv::ID CalleeCC,
4596  ImmutableCallSite CS,
4597  bool isVarArg,
4598  const SmallVectorImpl<ISD::OutputArg> &Outs,
4599  const SmallVectorImpl<ISD::InputArg> &Ins,
4600  SelectionDAG& DAG) const {
4601  bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4602 
4603  if (DisableSCO && !TailCallOpt) return false;
4604 
4605  // Variadic argument functions are not supported.
4606  if (isVarArg) return false;
4607 
4608  auto &Caller = DAG.getMachineFunction().getFunction();
4609  // Check that the calling conventions are compatible for tco.
4610  if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4611  return false;
4612 
4613  // Caller contains any byval parameter is not supported.
4614  if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4615  return false;
4616 
4617  // Callee contains any byval parameter is not supported, too.
4618  // Note: This is a quick work around, because in some cases, e.g.
4619  // caller's stack size > callee's stack size, we are still able to apply
4620  // sibling call optimization. For example, gcc is able to do SCO for caller1
4621  // in the following example, but not for caller2.
4622  // struct test {
4623  // long int a;
4624  // char ary[56];
4625  // } gTest;
4626  // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4627  // b->a = v.a;
4628  // return 0;
4629  // }
4630  // void caller1(struct test a, struct test c, struct test *b) {
4631  // callee(gTest, b); }
4632  // void caller2(struct test *b) { callee(gTest, b); }
4633  if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4634  return false;
4635 
4636  // If callee and caller use different calling conventions, we cannot pass
4637  // parameters on stack since offsets for the parameter area may be different.
4638  if (Caller.getCallingConv() != CalleeCC &&
4639  needStackSlotPassParameters(Subtarget, Outs))
4640  return false;
4641 
4642  // No TCO/SCO on indirect call because Caller have to restore its TOC
4643  if (!isFunctionGlobalAddress(Callee) &&
4644  !isa<ExternalSymbolSDNode>(Callee))
4645  return false;
4646 
4647  // If the caller and callee potentially have different TOC bases then we
4648  // cannot tail call since we need to restore the TOC pointer after the call.
4649  // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4650  if (!callsShareTOCBase(&Caller, Callee, getTargetMachine()))
4651  return false;
4652 
4653  // TCO allows altering callee ABI, so we don't have to check further.
4654  if (CalleeCC == CallingConv::Fast && TailCallOpt)
4655  return true;
4656 
4657  if (DisableSCO) return false;
4658 
4659  // If callee use the same argument list that caller is using, then we can
4660  // apply SCO on this case. If it is not, then we need to check if callee needs
4661  // stack for passing arguments.
4662  if (!hasSameArgumentList(&Caller, CS) &&
4663  needStackSlotPassParameters(Subtarget, Outs)) {
4664  return false;
4665  }
4666 
4667  return true;
4668 }
4669 
4670 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
4671 /// for tail call optimization. Targets which want to do tail call
4672 /// optimization should implement this function.
4673 bool
4674 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4675  CallingConv::ID CalleeCC,
4676  bool isVarArg,
4677  const SmallVectorImpl<ISD::InputArg> &Ins,
4678  SelectionDAG& DAG) const {
4679  if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4680  return false;
4681 
4682  // Variable argument functions are not supported.
4683  if (isVarArg)
4684  return false;
4685 
4686  MachineFunction &MF = DAG.getMachineFunction();
4687  CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
4688  if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4689  // Functions containing by val parameters are not supported.
4690  for (unsigned i = 0; i != Ins.size(); i++) {
4691  ISD::ArgFlagsTy Flags = Ins[i].Flags;
4692  if (Flags.isByVal()) return false;
4693  }
4694 
4695  // Non-PIC/GOT tail calls are supported.
4697  return true;
4698 
4699  // At the moment we can only do local tail calls (in same module, hidden
4700  // or protected) if we are generating PIC.
4701  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4702  return G->getGlobal()->hasHiddenVisibility()
4703  || G->getGlobal()->hasProtectedVisibility();
4704  }
4705 
4706  return false;
4707 }
4708 
4709 /// isCallCompatibleAddress - Return the immediate to use if the specified
4710 /// 32-bit value is representable in the immediate field of a BxA instruction.
4713  if (!C) return nullptr;
4714 
4715  int Addr = C->getZExtValue();
4716  if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
4717  SignExtend32<26>(Addr) != Addr)
4718  return nullptr; // Top 6 bits have to be sext of immediate.
4719 
4720  return DAG
4721  .getConstant(
4722  (int)C->getZExtValue() >> 2, SDLoc(Op),
4724  .getNode();
4725 }
4726 
4727 namespace {
4728 
4729 struct TailCallArgumentInfo {
4730  SDValue Arg;
4731  SDValue FrameIdxOp;
4732  int FrameIdx = 0;
4733 
4734  TailCallArgumentInfo() = default;
4735 };
4736 
4737 } // end anonymous namespace
4738 
4739 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4741  SelectionDAG &DAG, SDValue Chain,
4742  const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4743  SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4744  for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4745  SDValue Arg = TailCallArgs[i].Arg;
4746  SDValue FIN = TailCallArgs[i].FrameIdxOp;
4747  int FI = TailCallArgs[i].FrameIdx;
4748  // Store relative to framepointer.
4749  MemOpChains.push_back(DAG.getStore(
4750  Chain, dl, Arg, FIN,
4752  }
4753 }
4754 
4755 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4756 /// the appropriate stack slot for the tail call optimized function call.
4758  SDValue OldRetAddr, SDValue OldFP,
4759  int SPDiff, const SDLoc &dl) {
4760  if (SPDiff) {
4761  // Calculate the new stack slot for the return address.
4762  MachineFunction &MF = DAG.getMachineFunction();
4763  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4764  const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4765  bool isPPC64 = Subtarget.isPPC64();
4766  int SlotSize = isPPC64 ? 8 : 4;
4767  int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4768  int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4769  NewRetAddrLoc, true);
4770  EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4771  SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
4772  Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4773  MachinePointerInfo::getFixedStack(MF, NewRetAddr));
4774 
4775  // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
4776  // slot as the FP is never overwritten.
4777  if (Subtarget.isDarwinABI()) {
4778  int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
4779  int NewFPIdx = MF.getFrameInfo().CreateFixedObject(SlotSize, NewFPLoc,
4780  true);
4781  SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
4782  Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
4784  DAG.getMachineFunction(), NewFPIdx));
4785  }
4786  }
4787  return Chain;
4788 }
4789 
4790 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
4791 /// the position of the argument.
4792 static void
4794  SDValue Arg, int SPDiff, unsigned ArgOffset,
4795  SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
4796  int Offset = ArgOffset + SPDiff;
4797  uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
4798  int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4799  EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4800  SDValue FIN = DAG.getFrameIndex(FI, VT);
4801  TailCallArgumentInfo Info;
4802  Info.Arg = Arg;
4803  Info.FrameIdxOp = FIN;
4804  Info.FrameIdx = FI;
4805  TailCallArguments.push_back(Info);
4806 }
4807 
4808 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
4809 /// stack slot. Returns the chain as result and the loaded frame pointers in
4810 /// LROpOut/FPOpout. Used when tail calling.
4811 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
4812  SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
4813  SDValue &FPOpOut, const SDLoc &dl) const {
4814  if (SPDiff) {
4815  // Load the LR and FP stack slot for later adjusting.
4816  EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
4817  LROpOut = getReturnAddrFrameIndex(DAG);
4818  LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
4819  Chain = SDValue(LROpOut.getNode(), 1);
4820 
4821  // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
4822  // slot as the FP is never overwritten.
4823  if (Subtarget.isDarwinABI()) {
4824  FPOpOut = getFramePointerFrameIndex(DAG);
4825  FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo());
4826  Chain = SDValue(FPOpOut.getNode(), 1);
4827  }
4828  }
4829  return Chain;
4830 }
4831 
4832 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
4833 /// by "Src" to address "Dst" of size "Size". Alignment information is
4834 /// specified by the specific parameter attribute. The copy will be passed as
4835 /// a byval function parameter.
4836 /// Sometimes what we are copying is the end of a larger object, the part that
4837 /// does not fit in registers.
4839  SDValue Chain, ISD::ArgFlagsTy Flags,
4840  SelectionDAG &DAG, const SDLoc &dl) {
4841  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
4842  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
4843  false, false, false, MachinePointerInfo(),
4844  MachinePointerInfo());
4845 }
4846 
4847 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
4848 /// tail calls.
4849 static void LowerMemOpCallTo(
4850  SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
4851  SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
4852  bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
4853  SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
4854  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4855  if (!isTailCall) {
4856  if (isVector) {
4857  SDValue StackPtr;
4858  if (isPPC64)
4859  StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4860  else
4861  StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4862  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4863  DAG.getConstant(ArgOffset, dl, PtrVT));
4864  }
4865  MemOpChains.push_back(
4866  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
4867  // Calculate and remember argument location.
4868  } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
4869  TailCallArguments);
4870 }
4871 
4872 static void
4874  const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
4875  SDValue FPOp,
4876  SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
4877  // Emit a sequence of copyto/copyfrom virtual registers for arguments that
4878  // might overwrite each other in case of tail call optimization.
4879  SmallVector<SDValue, 8> MemOpChains2;
4880  // Do not flag preceding copytoreg stuff together with the following stuff.
4881  InFlag = SDValue();
4882  StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
4883  MemOpChains2, dl);
4884  if (!MemOpChains2.empty())
4885  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4886 
4887  // Store the return address to the appropriate stack slot.
4888  Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
4889 
4890  // Emit callseq_end just before tailcall node.
4891  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4892  DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4893  InFlag = Chain.getValue(1);
4894 }
4895 
4896 // Is this global address that of a function that can be called by name? (as
4897 // opposed to something that must hold a descriptor for an indirect call).
4898 static bool isFunctionGlobalAddress(SDValue Callee) {
4899  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
4900  if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
4902  return false;
4903 
4904  return G->getGlobal()->getValueType()->isFunctionTy();
4905  }
4906 
4907  return false;
4908 }
4909 
4910 static unsigned
4911 PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
4912  SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall,
4913  bool isPatchPoint, bool hasNest,
4914  SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
4915  SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
4916  ImmutableCallSite CS, const PPCSubtarget &Subtarget) {
4917  bool isPPC64 = Subtarget.isPPC64();
4918  bool isSVR4ABI = Subtarget.isSVR4ABI();
4919  bool isELFv2ABI = Subtarget.isELFv2ABI();
4920 
4921  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4922  NodeTys.push_back(MVT::Other); // Returns a chain
4923  NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use.
4924 
4925  unsigned CallOpc = PPCISD::CALL;
4926 
4927  bool needIndirectCall = true;
4928  if (!isSVR4ABI || !isPPC64)
4929  if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
4930  // If this is an absolute destination address, use the munged value.
4931  Callee = SDValue(Dest, 0);
4932  needIndirectCall = false;
4933  }
4934 
4935  // PC-relative references to external symbols should go through $stub, unless
4936  // we're building with the leopard linker or later, which automatically
4937  // synthesizes these stubs.
4938  const TargetMachine &TM = DAG.getTarget();
4939  const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
4940  const GlobalValue *GV = nullptr;
4941  if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
4942  GV = G->getGlobal();
4943  bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
4944  bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64;
4945 
4946  if (isFunctionGlobalAddress(Callee)) {
4947  GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
4948  // A call to a TLS address is actually an indirect call to a
4949  // thread-specific pointer.
4950  unsigned OpFlags = 0;
4951  if (UsePlt)
4952  OpFlags = PPCII::MO_PLT;
4953 
4954  // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
4955  // every direct call is) turn it into a TargetGlobalAddress /
4956  // TargetExternalSymbol node so that legalize doesn't hack it.
4957  Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
4958  Callee.getValueType(), 0, OpFlags);
4959  needIndirectCall = false;
4960  }
4961 
4962  if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4963  unsigned char OpFlags = 0;
4964 
4965  if (UsePlt)
4966  OpFlags = PPCII::MO_PLT;
4967 
4968  Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
4969  OpFlags);
4970  needIndirectCall = false;
4971  }
4972 
4973  if (isPatchPoint) {
4974  // We'll form an invalid direct call when lowering a patchpoint; the full
4975  // sequence for an indirect call is complicated, and many of the
4976  // instructions introduced might have side effects (and, thus, can't be
4977  // removed later). The call itself will be removed as soon as the
4978  // argument/return lowering is complete, so the fact that it has the wrong
4979  // kind of operands should not really matter.
4980  needIndirectCall = false;
4981  }
4982 
4983  if (needIndirectCall) {
4984  // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
4985  // to do the call, we can't use PPCISD::CALL.
4986  SDValue MTCTROps[] = {Chain, Callee, InFlag};
4987 
4988  if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
4989  // Function pointers in the 64-bit SVR4 ABI do not point to the function
4990  // entry point, but to the function descriptor (the function entry point
4991  // address is part of the function descriptor though).
4992  // The function descriptor is a three doubleword structure with the
4993  // following fields: function entry point, TOC base address and
4994  // environment pointer.
4995  // Thus for a call through a function pointer, the following actions need
4996  // to be performed:
4997  // 1. Save the TOC of the caller in the TOC save area of its stack
4998  // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
4999  // 2. Load the address of the function entry point from the function
5000  // descriptor.
5001  // 3. Load the TOC of the callee from the function descriptor into r2.
5002  // 4. Load the environment pointer from the function descriptor into
5003  // r11.
5004  // 5. Branch to the function entry point address.
5005  // 6. On return of the callee, the TOC of the caller needs to be
5006  // restored (this is done in FinishCall()).
5007  //
5008  // The loads are scheduled at the beginning of the call sequence, and the
5009  // register copies are flagged together to ensure that no other
5010  // operations can be scheduled in between. E.g. without flagging the
5011  // copies together, a TOC access in the caller could be scheduled between
5012  // the assignment of the callee TOC and the branch to the callee, which
5013  // results in the TOC access going through the TOC of the callee instead
5014  // of going through the TOC of the caller, which leads to incorrect code.
5015 
5016  // Load the address of the function entry point from the function
5017  // descriptor.
5018  SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
5019  if (LDChain.getValueType() == MVT::Glue)
5020  LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
5021 
5022  auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5026 
5027  MachinePointerInfo MPI(CS ? CS.getCalledValue() : nullptr);
5028  SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
5029  /* Alignment = */ 8, MMOFlags);
5030 
5031  // Load environment pointer into r11.
5032  SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
5033  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
5034  SDValue LoadEnvPtr =
5035  DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16),
5036  /* Alignment = */ 8, MMOFlags);
5037 
5038  SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
5039  SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
5040  SDValue TOCPtr =
5041  DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8),
5042  /* Alignment = */ 8, MMOFlags);
5043 
5044  setUsesTOCBasePtr(DAG);
5045  SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
5046  InFlag);
5047  Chain = TOCVal.getValue(0);
5048  InFlag = TOCVal.getValue(1);
5049 
5050  // If the function call has an explicit 'nest' parameter, it takes the
5051  // place of the environment pointer.
5052  if (!hasNest) {
5053  SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
5054  InFlag);
5055 
5056  Chain = EnvVal.getValue(0);
5057  InFlag = EnvVal.getValue(1);
5058  }
5059 
5060  MTCTROps[0] = Chain;
5061  MTCTROps[1] = LoadFuncPtr;
5062  MTCTROps[2] = InFlag;
5063  }
5064 
5065  Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
5066  makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
5067  InFlag = Chain.getValue(1);
5068 
5069  NodeTys.clear();
5070  NodeTys.push_back(MVT::Other);
5071  NodeTys.push_back(MVT::Glue);
5072  Ops.push_back(Chain);
5073  CallOpc = PPCISD::BCTRL;
5074  Callee.setNode(nullptr);
5075  // Add use of X11 (holding environment pointer)
5076  if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest)
5077  Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
5078  // Add CTR register as callee so a bctr can be emitted later.
5079  if (isTailCall)
5080  Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
5081  }
5082 
5083  // If this is a direct call, pass the chain and the callee.
5084  if (Callee.getNode()) {
5085  Ops.push_back(Chain);
5086  Ops.push_back(Callee);
5087  }
5088  // If this is a tail call add stack pointer delta.
5089  if (isTailCall)
5090  Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5091 
5092  // Add argument registers to the end of the list so that they are known live
5093  // into the call.
5094  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5095  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5096  RegsToPass[i].second.getValueType()));
5097 
5098  // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
5099  // into the call.
5100  // We do need to reserve X2 to appease the verifier for the PATCHPOINT.
5101  if (isSVR4ABI && isPPC64) {
5102  setUsesTOCBasePtr(DAG);
5103 
5104  // We cannot add X2 as an operand here for PATCHPOINT, because there is no
5105  // way to mark dependencies as implicit here. We will add the X2 dependency
5106  // in EmitInstrWithCustomInserter.
5107  if (!isPatchPoint)
5108  Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
5109  }
5110 
5111  return CallOpc;
5112 }
5113 
5114 SDValue PPCTargetLowering::LowerCallResult(
5115  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5116  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5117  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5119  CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5120  *DAG.getContext());
5121 
5122  CCRetInfo.AnalyzeCallResult(
5123  Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5124  ? RetCC_PPC_Cold
5125  : RetCC_PPC);
5126 
5127  // Copy all of the result registers out of their specified physreg.
5128  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5129  CCValAssign &VA = RVLocs[i];
5130  assert(VA.isRegLoc() && "Can only return in registers!");
5131 
5132  SDValue Val = DAG.getCopyFromReg(Chain, dl,
5133  VA.getLocReg(), VA.getLocVT(), InFlag);
5134  Chain = Val.getValue(1);
5135  InFlag = Val.getValue(2);
5136 
5137  switch (VA.getLocInfo()) {
5138  default: llvm_unreachable("Unknown loc info!");
5139  case CCValAssign::Full: break;
5140  case CCValAssign::AExt:
5141  Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5142  break;
5143  case CCValAssign::ZExt:
5144  Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5145  DAG.getValueType(VA.getValVT()));
5146  Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5147  break;
5148  case CCValAssign::SExt:
5149  Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5150  DAG.getValueType(VA.getValVT()));
5151  Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5152  break;
5153  }
5154 
5155  InVals.push_back(Val);
5156  }
5157 
5158  return Chain;
5159 }
5160 
5161 SDValue PPCTargetLowering::FinishCall(
5162  CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg,
5163  bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
5164  SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag,
5165  SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5166  unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5167  SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const {
5168  std::vector<EVT> NodeTys;
5170  unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
5171  SPDiff, isTailCall, isPatchPoint, hasNest,
5172  RegsToPass, Ops, NodeTys, CS, Subtarget);
5173 
5174  // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5175  if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
5176  Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5177 
5178  // When performing tail call optimization the callee pops its arguments off
5179  // the stack. Account for this here so these bytes can be pushed back on in
5180  // PPCFrameLowering::eliminateCallFramePseudoInstr.
5181  int BytesCalleePops =
5182  (CallConv == CallingConv::Fast &&
5184 
5185  // Add a register mask operand representing the call-preserved registers.
5186  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5187  const uint32_t *Mask =
5188  TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
5189  assert(Mask && "Missing call preserved mask for calling convention");
5190  Ops.push_back(DAG.getRegisterMask(Mask));
5191 
5192  if (InFlag.getNode())
5193  Ops.push_back(InFlag);
5194 
5195  // Emit tail call.
5196  if (isTailCall) {
5197  assert(((Callee.getOpcode() == ISD::Register &&
5198  cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5199  Callee.getOpcode() == ISD::TargetExternalSymbol ||
5200  Callee.getOpcode() == ISD::TargetGlobalAddress ||
5201  isa<ConstantSDNode>(Callee)) &&
5202  "Expecting an global address, external symbol, absolute value or register");
5203 
5205  return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
5206  }
5207 
5208  // Add a NOP immediately after the branch instruction when using the 64-bit
5209  // SVR4 ABI. At link time, if caller and callee are in a different module and
5210  // thus have a different TOC, the call will be replaced with a call to a stub
5211  // function which saves the current TOC, loads the TOC of the callee and
5212  // branches to the callee. The NOP will be replaced with a load instruction
5213  // which restores the TOC of the caller from the TOC save slot of the current
5214  // stack frame. If caller and callee belong to the same module (and have the
5215  // same TOC), the NOP will remain unchanged.
5216 
5217  MachineFunction &MF = DAG.getMachineFunction();
5218  if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
5219  !isPatchPoint) {
5220  if (CallOpc == PPCISD::BCTRL) {
5221  // This is a call through a function pointer.
5222  // Restore the caller TOC from the save area into R2.
5223  // See PrepareCall() for more information about calls through function
5224  // pointers in the 64-bit SVR4 ABI.
5225  // We are using a target-specific load with r2 hard coded, because the
5226  // result of a target-independent load would never go directly into r2,
5227  // since r2 is a reserved register (which prevents the register allocator
5228  // from allocating it), resulting in an additional register being
5229  // allocated and an unnecessary move instruction being generated.
5230  CallOpc = PPCISD::BCTRL_LOAD_TOC;
5231 
5232  EVT PtrVT = getPointerTy(DAG.getDataLayout());
5233  SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
5234  unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5235  SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5236  SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
5237 
5238  // The address needs to go after the chain input but before the flag (or
5239  // any other variadic arguments).
5240  Ops.insert(std::next(Ops.begin()), AddTOC);
5241  } else if (CallOpc == PPCISD::CALL &&
5242  !callsShareTOCBase(&MF.getFunction(), Callee, DAG.getTarget())) {
5243  // Otherwise insert NOP for non-local calls.
5244  CallOpc = PPCISD::CALL_NOP;
5245  }
5246  }
5247 
5248  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
5249  InFlag = Chain.getValue(1);
5250 
5251  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5252  DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5253  InFlag, dl);
5254  if (!Ins.empty())
5255  InFlag = Chain.getValue(1);
5256 
5257  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
5258  Ins, dl, DAG, InVals);
5259 }
5260 
5261 SDValue
5262 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5263  SmallVectorImpl<SDValue> &InVals) const {
5264  SelectionDAG &DAG = CLI.DAG;
5265  SDLoc &dl = CLI.DL;
5267  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5269  SDValue Chain = CLI.Chain;
5270  SDValue Callee = CLI.Callee;
5271  bool &isTailCall = CLI.IsTailCall;
5272  CallingConv::ID CallConv = CLI.CallConv;
5273  bool isVarArg = CLI.IsVarArg;
5274  bool isPatchPoint = CLI.IsPatchPoint;
5275  ImmutableCallSite CS = CLI.CS;
5276 
5277  if (isTailCall) {
5278  if (Subtarget.useLongCalls() && !(CS && CS.isMustTailCall()))
5279  isTailCall = false;
5280  else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5281  isTailCall =
5282  IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS,
5283  isVarArg, Outs, Ins, DAG);
5284  else
5285  isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5286  Ins, DAG);
5287  if (isTailCall) {
5288  ++NumTailCalls;
5289  if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5290  ++NumSiblingCalls;
5291 
5292  assert(isa<GlobalAddressSDNode>(Callee) &&
5293  "Callee should be an llvm::Function object.");
5294  LLVM_DEBUG(
5295  const GlobalValue *GV =
5296  cast<GlobalAddressSDNode>(Callee)->getGlobal();
5297  const unsigned Width =
5298  80 - strlen("TCO caller: ") - strlen(", callee linkage: 0, 0");
5299  dbgs() << "TCO caller: "
5300  << left_justify(DAG.getMachineFunction().getName(), Width)
5301  << ", callee linkage: " << GV->getVisibility() << ", "
5302  << GV->getLinkage() << "\n");
5303  }
5304  }
5305 
5306  if (!isTailCall && CS && CS.isMustTailCall())
5307  report_fatal_error("failed to perform tail call elimination on a call "
5308  "site marked musttail");
5309 
5310  // When long calls (i.e. indirect calls) are always used, calls are always
5311  // made via function pointer. If we have a function name, first translate it
5312  // into a pointer.
5313  if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5314  !isTailCall)
5315  Callee = LowerGlobalAddress(Callee, DAG);
5316 
5317  if (Subtarget.isSVR4ABI()) {
5318  if (Subtarget.isPPC64())
5319  return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
5320  isTailCall, isPatchPoint, Outs, OutVals, Ins,
5321  dl, DAG, InVals, CS);
5322  else
5323  return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
5324  isTailCall, isPatchPoint, Outs, OutVals, Ins,
5325  dl, DAG, InVals, CS);
5326  }
5327 
5328  return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
5329  isTailCall, isPatchPoint, Outs, OutVals, Ins,
5330  dl, DAG, InVals, CS);
5331 }
5332 
5333 SDValue PPCTargetLowering::LowerCall_32SVR4(
5334  SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5335  bool isTailCall, bool isPatchPoint,
5336  const SmallVectorImpl<ISD::OutputArg> &Outs,
5337  const SmallVectorImpl<SDValue> &OutVals,
5338  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5339  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5340  ImmutableCallSite CS) const {
5341  // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5342  // of the 32-bit SVR4 ABI stack frame layout.
5343 
5344  assert((CallConv == CallingConv::C ||
5345  CallConv == CallingConv::Cold ||
5346  CallConv == CallingConv::Fast) && "Unknown calling convention!");
5347 
5348  unsigned PtrByteSize = 4;
5349 
5350  MachineFunction &MF = DAG.getMachineFunction();
5351 
5352  // Mark this function as potentially containing a function that contains a
5353  // tail call. As a consequence the frame pointer will be used for dynamicalloc
5354  // and restoring the callers stack pointer in this functions epilog. This is
5355  // done because by tail calling the called function might overwrite the value
5356  // in this function's (MF) stack pointer stack slot 0(SP).
5357  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5358  CallConv == CallingConv::Fast)
5359  MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5360 
5361  // Count how many bytes are to be pushed on the stack, including the linkage
5362  // area, parameter list area and the part of the local variable space which
5363  // contains copies of aggregates which are passed by value.
5364 
5365  // Assign locations to all of the outgoing arguments.
5367  PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
5368 
5369  // Reserve space for the linkage area on the stack.
5370  CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5371  PtrByteSize);
5372  if (useSoftFloat())
5373  CCInfo.PreAnalyzeCallOperands(Outs);
5374 
5375  if (isVarArg) {
5376  // Handle fixed and variable vector arguments differently.
5377  // Fixed vector arguments go into registers as long as registers are
5378  // available. Variable vector arguments always go into memory.
5379  unsigned NumArgs = Outs.size();
5380 
5381  for (unsigned i = 0; i != NumArgs; ++i) {
5382  MVT ArgVT = Outs[i].VT;
5383  ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5384  bool Result;
5385 
5386  if (Outs[i].IsFixed) {
5387  Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5388  CCInfo);
5389  } else {
5390  Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
5391  ArgFlags, CCInfo);
5392  }
5393 
5394  if (Result) {
5395 #ifndef NDEBUG
5396  errs() << "Call operand #" << i << " has unhandled type "
5397  << EVT(ArgVT).getEVTString() << "\n";
5398 #endif
5399  llvm_unreachable(nullptr);
5400  }
5401  }
5402  } else {
5403  // All arguments are treated the same.
5404  CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5405  }
5406  CCInfo.clearWasPPCF128();
5407 
5408  // Assign locations to all of the outgoing aggregate by value arguments.
5409  SmallVector<CCValAssign, 16> ByValArgLocs;
5410  CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext());
5411 
5412  // Reserve stack space for the allocations in CCInfo.
5413  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
5414 
5415  CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5416 
5417  // Size of the linkage area, parameter list area and the part of the local
5418  // space variable where copies of aggregates which are passed by value are
5419  // stored.
5420  unsigned NumBytes = CCByValInfo.getNextStackOffset();
5421 
5422  // Calculate by how many bytes the stack has to be adjusted in case of tail
5423  // call optimization.
5424  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5425 
5426  // Adjust the stack pointer for the new arguments...
5427  // These operations are automatically eliminated by the prolog/epilog pass
5428  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5429  SDValue CallSeqStart = Chain;
5430 
5431  // Load the return address and frame pointer so it can be moved somewhere else
5432  // later.
5433  SDValue LROp, FPOp;
5434  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5435 
5436  // Set up a copy of the stack pointer for use loading and storing any
5437  // arguments that may not fit in the registers available for argument
5438  // passing.
5439  SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5440 
5442  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5443  SmallVector<SDValue, 8> MemOpChains;
5444 
5445  bool seenFloatArg = false;
5446  // Walk the register/memloc assignments, inserting copies/loads.
5447  for (unsigned i = 0, j = 0, e = ArgLocs.size();
5448  i != e;
5449  ++i) {
5450  CCValAssign &VA = ArgLocs[i];
5451  SDValue Arg = OutVals[i];
5452  ISD::ArgFlagsTy Flags = Outs[i].Flags;
5453 
5454  if (Flags.isByVal()) {
5455  // Argument is an aggregate which is passed by value, thus we need to
5456  // create a copy of it in the local variable space of the current stack
5457  // frame (which is the stack frame of the caller) and pass the address of
5458  // this copy to the callee.
5459  assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5460  CCValAssign &ByValVA = ByValArgLocs[j++];
5461  assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5462 
5463  // Memory reserved in the local variable space of the callers stack frame.
5464  unsigned LocMemOffset = ByValVA.getLocMemOffset();
5465 
5466  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5467  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5468  StackPtr, PtrOff);
5469 
5470  // Create a copy of the argument in the local area of the current
5471  // stack frame.
5472  SDValue MemcpyCall =
5473  CreateCopyOfByValArgument(Arg, PtrOff,
5474  CallSeqStart.getNode()->getOperand(0),
5475  Flags, DAG, dl);
5476 
5477  // This must go outside the CALLSEQ_START..END.
5478  SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5479  SDLoc(MemcpyCall));
5480  DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5481  NewCallSeqStart.getNode());
5482  Chain = CallSeqStart = NewCallSeqStart;
5483 
5484  // Pass the address of the aggregate copy on the stack either in a
5485  // physical register or in the parameter list area of the current stack
5486  // frame to the callee.
5487  Arg = PtrOff;
5488  }
5489 
5490  // When useCRBits() is true, there can be i1 arguments.
5491  // It is because getRegisterType(MVT::i1) => MVT::i1,
5492  // and for other integer types getRegisterType() => MVT::i32.
5493  // Extend i1 and ensure callee will get i32.
5494  if (Arg.getValueType() == MVT::i1)
5495  Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
5496  dl, MVT::i32, Arg);
5497 
5498  if (VA.isRegLoc()) {
5499  seenFloatArg |= VA.getLocVT().isFloatingPoint();
5500  // Put argument in a physical register.
5501  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5502  } else {
5503  // Put argument in the parameter list area of the current stack frame.
5504  assert(VA.isMemLoc());
5505  unsigned LocMemOffset = VA.getLocMemOffset();
5506 
5507  if (!isTailCall) {
5508  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5509  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5510  StackPtr, PtrOff);
5511 
5512  MemOpChains.push_back(
5513  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5514  } else {
5515  // Calculate and remember argument location.
5516  CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5517  TailCallArguments);
5518  }
5519  }
5520  }
5521 
5522  if (!MemOpChains.empty())
5523  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5524 
5525  // Build a sequence of copy-to-reg nodes chained together with token chain
5526  // and flag operands which copy the outgoing args into the appropriate regs.
5527  SDValue InFlag;
5528  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5529  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5530  RegsToPass[i].second, InFlag);
5531  InFlag = Chain.getValue(1);
5532  }
5533 
5534  // Set CR bit 6 to true if this is a vararg call with floating args passed in
5535  // registers.
5536  if (isVarArg) {
5537  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5538  SDValue Ops[] = { Chain, InFlag };
5539 
5540  Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5541  dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5542 
5543  InFlag = Chain.getValue(1);
5544  }
5545 
5546  if (isTailCall)
5547  PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5548  TailCallArguments);
5549 
5550  return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
5551  /* unused except on PPC64 ELFv1 */ false, DAG,
5552  RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
5553  NumBytes, Ins, InVals, CS);
5554 }
5555 
5556 // Copy an argument into memory, being careful to do this outside the
5557 // call sequence for the call to which the argument belongs.
5558 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5559  SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5560  SelectionDAG &DAG, const SDLoc &dl) const {
5561  SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5562  CallSeqStart.getNode()->getOperand(0),
5563  Flags, DAG, dl);
5564  // The MEMCPY must go outside the CALLSEQ_START..END.
5565  int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5566  SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5567  SDLoc(MemcpyCall));
5568  DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5569  NewCallSeqStart.getNode());
5570  return NewCallSeqStart;
5571 }
5572 
5573 SDValue PPCTargetLowering::LowerCall_64SVR4(
5574  SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5575  bool isTailCall, bool isPatchPoint,
5576  const SmallVectorImpl<ISD::OutputArg> &Outs,
5577  const SmallVectorImpl<SDValue> &OutVals,
5578  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5579  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5580  ImmutableCallSite CS) const {
5581  bool isELFv2ABI = Subtarget.isELFv2ABI();
5582  bool isLittleEndian = Subtarget.isLittleEndian();
5583  unsigned NumOps = Outs.size();
5584  bool hasNest = false;
5585  bool IsSibCall = false;
5586 
5587  EVT PtrVT = getPointerTy(DAG.getDataLayout());
5588  unsigned PtrByteSize = 8;
5589 
5590  MachineFunction &MF = DAG.getMachineFunction();
5591 
5592  if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5593  IsSibCall = true;
5594 
5595  // Mark this function as potentially containing a function that contains a
5596  // tail call. As a consequence the frame pointer will be used for dynamicalloc
5597  // and restoring the callers stack pointer in this functions epilog. This is
5598  // done because by tail calling the called function might overwrite the value
5599  // in this function's (MF) stack pointer stack slot 0(SP).
5600  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5601  CallConv == CallingConv::Fast)
5602  MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5603 
5604  assert(!(CallConv == CallingConv::Fast && isVarArg) &&
5605  "fastcc not supported on varargs functions");
5606 
5607  // Count how many bytes are to be pushed on the stack, including the linkage
5608  // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
5609  // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5610  // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5611  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5612  unsigned NumBytes = LinkageSize;
5613  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5614  unsigned &QFPR_idx = FPR_idx;
5615 
5616  static const MCPhysReg GPR[] = {
5617  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5618  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5619  };
5620  static const MCPhysReg VR[] = {
5621  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5622  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5623  };
5624 
5625  const unsigned NumGPRs = array_lengthof(GPR);
5626  const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
5627  const unsigned NumVRs = array_lengthof(VR);
5628  const unsigned NumQFPRs = NumFPRs;
5629 
5630  // On ELFv2, we can avoid allocating the parameter area if all the arguments
5631  // can be passed to the callee in registers.
5632  // For the fast calling convention, there is another check below.
5633  // Note: We should keep consistent with LowerFormalArguments_64SVR4()
5634  bool HasParameterArea = !isELFv2ABI || isVarArg || CallConv == CallingConv::Fast;
5635  if (!HasParameterArea) {
5636  unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5637  unsigned AvailableFPRs = NumFPRs;
5638  unsigned AvailableVRs = NumVRs;
5639  unsigned NumBytesTmp = NumBytes;
5640  for (unsigned i = 0; i != NumOps; ++i) {
5641  if (Outs[i].Flags.isNest()) continue;
5642  if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
5643  PtrByteSize, LinkageSize, ParamAreaSize,
5644  NumBytesTmp, AvailableFPRs, AvailableVRs,
5645  Subtarget.hasQPX()))
5646  HasParameterArea = true;
5647  }
5648  }
5649 
5650  // When using the fast calling convention, we don't provide backing for
5651  // arguments that will be in registers.
5652  unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
5653 
5654  // Avoid allocating parameter area for fastcc functions if all the arguments
5655  // can be passed in the registers.
5656  if (CallConv == CallingConv::Fast)
5657  HasParameterArea = false;
5658 
5659  // Add up all the space actually used.
5660  for (unsigned i = 0; i != NumOps; ++i) {
5661  ISD::ArgFlagsTy Flags = Outs[i].Flags;
5662  EVT ArgVT = Outs[i].VT;
5663  EVT OrigVT = Outs[i].ArgVT;
5664 
5665  if (Flags.isNest())
5666  continue;
5667 
5668  if (CallConv == CallingConv::Fast) {
5669  if (Flags.isByVal()) {
5670  NumGPRsUsed += (Flags.getByValSize()+7)/8;
5671  if (NumGPRsUsed > NumGPRs)
5672  HasParameterArea = true;
5673  } else {
5674  switch (ArgVT.getSimpleVT().SimpleTy) {
5675  default: llvm_unreachable("Unexpected ValueType for argument!");
5676  case MVT::i1:
5677  case MVT::i32:
5678  case MVT::i64:
5679  if (++NumGPRsUsed <= NumGPRs)
5680  continue;
5681  break;
5682  case MVT::v4i32:
5683  case MVT::v8i16:
5684  case MVT::v16i8:
5685  case MVT::v2f64:
5686  case MVT::v2i64:
5687  case MVT::v1i128:
5688  case MVT::f128:
5689  if (++NumVRsUsed <= NumVRs)
5690  continue;
5691  break;
5692  case MVT::v4f32:
5693  // When using QPX, this is handled like a FP register, otherwise, it
5694  // is an Altivec register.
5695  if (Subtarget.hasQPX()) {
5696  if (++NumFPRsUsed <= NumFPRs)
5697  continue;
5698  } else {
5699  if (++NumVRsUsed <= NumVRs)
5700  continue;
5701  }
5702  break;
5703  case MVT::f32:
5704  case MVT::f64:
5705  case MVT::v4f64: // QPX
5706  case MVT::v4i1: // QPX
5707  if (++NumFPRsUsed <= NumFPRs)
5708  continue;
5709  break;
5710  }
5711  HasParameterArea = true;
5712  }
5713  }
5714 
5715  /* Respect alignment of argument on the stack. */
5716  unsigned Align =
5717  CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5718  NumBytes = ((NumBytes + Align - 1) / Align) * Align;
5719 
5720  NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5721  if (Flags.isInConsecutiveRegsLast())
5722  NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5723  }
5724 
5725  unsigned NumBytesActuallyUsed = NumBytes;
5726 
5727  // In the old ELFv1 ABI,
5728  // the prolog code of the callee may store up to 8 GPR argument registers to
5729  // the stack, allowing va_start to index over them in memory if its varargs.
5730  // Because we cannot tell if this is needed on the caller side, we have to
5731  // conservatively assume that it is needed. As such, make sure we have at
5732  // least enough stack space for the caller to store the 8 GPRs.
5733  // In the ELFv2 ABI, we allocate the parameter area iff a callee
5734  // really requires memory operands, e.g. a vararg function.
5735  if (HasParameterArea)
5736  NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
5737  else
5738  NumBytes = LinkageSize;
5739 
5740  // Tail call needs the stack to be aligned.
5741  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5742  CallConv == CallingConv::Fast)
5743  NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
5744 
5745  int SPDiff = 0;
5746 
5747  // Calculate by how many bytes the stack has to be adjusted in case of tail
5748  // call optimization.
5749  if (!IsSibCall)
5750  SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5751 
5752  // To protect arguments on the stack from being clobbered in a tail call,
5753  // force all the loads to happen before doing any other lowering.
5754  if (isTailCall)
5755  Chain = DAG.getStackArgumentTokenFactor(Chain);
5756 
5757  // Adjust the stack pointer for the new arguments...
5758  // These operations are automatically eliminated by the prolog/epilog pass
5759  if (!IsSibCall)
5760  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5761  SDValue CallSeqStart = Chain;
5762 
5763  // Load the return address and frame pointer so it can be move somewhere else
5764  // later.
5765  SDValue LROp, FPOp;
5766  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5767 
5768  // Set up a copy of the stack pointer for use loading and storing any
5769  // arguments that may not fit in the registers available for argument
5770  // passing.
5771  SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5772 
5773  // Figure out which arguments are going to go in registers, and which in
5774  // memory. Also, if this is a vararg function, floating point operations
5775  // must be stored to our stack, and loaded into integer regs as well, if
5776  // any integer regs are available for argument passing.
5777  unsigned ArgOffset = LinkageSize;
5778 
5780  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5781 
5782  SmallVector<SDValue, 8> MemOpChains;
5783  for (unsigned i = 0; i != NumOps; ++i) {
5784  SDValue Arg = OutVals[i];
5785  ISD::ArgFlagsTy Flags = Outs[i].Flags;
5786  EVT ArgVT = Outs[i].VT;
5787  EVT OrigVT = Outs[i].ArgVT;
5788 
5789  // PtrOff will be used to store the current argument to the stack if a
5790  // register cannot be found for it.
5791  SDValue PtrOff;
5792 
5793  // We re-align the argument offset for each argument, except when using the
5794  // fast calling convention, when we need to make sure we do that only when
5795  // we'll actually use a stack slot.
5796  auto ComputePtrOff = [&]() {
5797  /* Respect alignment of argument on the stack. */
5798  unsigned Align =
5799  CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5800  ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
5801 
5802  PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
5803 
5804  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5805  };
5806 
5807  if (CallConv != CallingConv::Fast) {
5808  ComputePtrOff();
5809 
5810  /* Compute GPR index associated with argument offset. */
5811  GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
5812  GPR_idx = std::min(GPR_idx, NumGPRs);
5813  }
5814 
5815  // Promote integers to 64-bit values.
5816  if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
5817  // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5818  unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5819  Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
5820  }
5821 
5822  // FIXME memcpy is used way more than necessary. Correctness first.
5823  // Note: "by value" is code for passing a structure by value, not
5824  // basic types.
5825  if (Flags.isByVal()) {
5826  // Note: Size includes alignment padding, so
5827  // struct x { short a; char b; }
5828  // will have Size = 4. With #pragma pack(1), it will have Size = 3.
5829  // These are the proper values we need for right-justifying the
5830  // aggregate in a parameter register.
5831  unsigned Size = Flags.getByValSize();
5832 
5833  // An empty aggregate parameter takes up no storage and no
5834  // registers.
5835  if (Size == 0)
5836  continue;
5837 
5838  if (CallConv == CallingConv::Fast)
5839  ComputePtrOff();
5840 
5841  // All aggregates smaller than 8 bytes must be passed right-justified.
5842  if (Size==1 || Size==2 || Size==4) {
5843  EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
5844  if (GPR_idx != NumGPRs) {
5845  SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
5846  MachinePointerInfo(), VT);
5847  MemOpChains.push_back(Load.getValue(1));
5848  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5849 
5850  ArgOffset += PtrByteSize;
5851  continue;
5852  }
5853  }
5854 
5855  if (GPR_idx == NumGPRs && Size < 8) {
5856  SDValue AddPtr = PtrOff;
5857  if (!isLittleEndian) {
5858  SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
5859  PtrOff.getValueType());
5860  AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5861  }
5862  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5863  CallSeqStart,
5864  Flags, DAG, dl);
5865  ArgOffset += PtrByteSize;
5866  continue;
5867  }
5868  // Copy entire object into memory. There are cases where gcc-generated
5869  // code assumes it is there, even if it could be put entirely into
5870  // registers. (This is not what the doc says.)
5871 
5872  // FIXME: The above statement is likely due to a misunderstanding of the
5873  // documents. All arguments must be copied into the parameter area BY
5874  // THE CALLEE in the event that the callee takes the address of any
5875  // formal argument. That has not yet been implemented. However, it is
5876  // reasonable to use the stack area as a staging area for the register
5877  // load.
5878 
5879  // Skip this for small aggregates, as we will use the same slot for a
5880  // right-justified copy, below.
5881  if (Size >= 8)
5882  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
5883  CallSeqStart,
5884  Flags, DAG, dl);
5885 
5886  // When a register is available, pass a small aggregate right-justified.
5887  if (Size < 8 && GPR_idx != NumGPRs) {
5888  // The easiest way to get this right-justified in a register
5889  // is to copy the structure into the rightmost portion of a
5890  // local variable slot, then load the whole slot into the
5891  // register.
5892  // FIXME: The memcpy seems to produce pretty awful code for
5893  // small aggregates, particularly for packed ones.
5894  // FIXME: It would be preferable to use the slot in the
5895  // parameter save area instead of a new local variable.
5896  SDValue AddPtr = PtrOff;
5897  if (!isLittleEndian) {
5898  SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
5899  AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5900  }
5901  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5902  CallSeqStart,
5903  Flags, DAG, dl);
5904 
5905  // Load the slot into the register.
5906  SDValue Load =
5907  DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
5908  MemOpChains.push_back(Load.getValue(1));
5909  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5910 
5911  // Done with this argument.
5912  ArgOffset += PtrByteSize;
5913  continue;
5914  }
5915 
5916  // For aggregates larger than PtrByteSize, copy the pieces of the
5917  // object that fit into registers from the parameter save area.
5918  for (unsigned j=0; j<Size; j+=PtrByteSize) {
5919  SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
5920  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
5921  if (GPR_idx != NumGPRs) {
5922  SDValue Load =
5923  DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
5924  MemOpChains.push_back(Load.getValue(1));
5925  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5926  ArgOffset += PtrByteSize;
5927  } else {
5928  ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
5929  break;
5930  }
5931  }
5932  continue;
5933  }
5934 
5935  switch (Arg.getSimpleValueType().SimpleTy) {
5936  default: llvm_unreachable("Unexpected ValueType for argument!");
5937  case MVT::i1:
5938  case MVT::i32:
5939  case MVT::i64:
5940  if (Flags.isNest()) {
5941  // The 'nest' parameter, if any, is passed in R11.
5942  RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
5943  hasNest = true;
5944  break;
5945  }
5946 
5947  // These can be scalar arguments or elements of an integer array type
5948  // passed directly. Clang may use those instead of "byval" aggregate
5949  // types to avoid forcing arguments to memory unnecessarily.
5950  if (GPR_idx != NumGPRs) {
5951  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
5952  } else {
5953  if (CallConv == CallingConv::Fast)
5954  ComputePtrOff();
5955 
5956  assert(HasParameterArea &&
5957  "Parameter area must exist to pass an argument in memory.");
5958  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5959  true, isTailCall, false, MemOpChains,
5960  TailCallArguments, dl);
5961  if (CallConv == CallingConv::Fast)
5962  ArgOffset += PtrByteSize;
5963  }
5964  if (CallConv != CallingConv::Fast)
5965  ArgOffset += PtrByteSize;
5966  break;
5967  case MVT::f32:
5968  case MVT::f64: {
5969  // These can be scalar arguments or elements of a float array type
5970  // passed directly. The latter are used to implement ELFv2 homogenous
5971  // float aggregates.
5972 
5973  // Named arguments go into FPRs first, and once they overflow, the
5974  // remaining arguments go into GPRs and then the parameter save area.
5975  // Unnamed arguments for vararg functions always go to GPRs and
5976  // then the parameter save area. For now, put all arguments to vararg
5977  // routines always in both locations (FPR *and* GPR or stack slot).
5978  bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
5979  bool NeededLoad = false;
5980 
5981  // First load the argument into the next available FPR.
5982  if (FPR_idx != NumFPRs)
5983  RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
5984 
5985  // Next, load the argument into GPR or stack slot if needed.
5986  if (!NeedGPROrStack)
5987  ;
5988  else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) {
5989  // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
5990  // once we support fp <-> gpr moves.
5991 
5992  // In the non-vararg case, this can only ever happen in the
5993  // presence of f32 array types, since otherwise we never run
5994  // out of FPRs before running out of GPRs.
5995  SDValue ArgVal;
5996 
5997  // Double values are always passed in a single GPR.
5998  if (Arg.getValueType() != MVT::f32) {
5999  ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6000 
6001  // Non-array float values are extended and passed in a GPR.
6002  } else if (!Flags.isInConsecutiveRegs()) {
6003  ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6004  ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6005 
6006  // If we have an array of floats, we collect every odd element
6007  // together with its predecessor into one GPR.
6008  } else if (ArgOffset % PtrByteSize != 0) {
6009  SDValue Lo, Hi;
6010  Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6011  Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6012  if (!isLittleEndian)
6013  std::swap(Lo, Hi);
6014  ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6015 
6016  // The final element, if even, goes into the first half of a GPR.
6017  } else if (Flags.isInConsecutiveRegsLast()) {
6018  ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6019  ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6020  if (!isLittleEndian)
6021  ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6022  DAG.getConstant(32, dl, MVT::i32));
6023 
6024  // Non-final even elements are skipped; they will be handled
6025  // together the with subsequent argument on the next go-around.
6026  } else
6027  ArgVal = SDValue();
6028 
6029  if (ArgVal.getNode())
6030  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6031  } else {
6032  if (CallConv == CallingConv::Fast)
6033  ComputePtrOff();
6034 
6035  // Single-precision floating-point values are mapped to the
6036  // second (rightmost) word of the stack doubleword.
6037  if (Arg.getValueType() == MVT::f32 &&
6038  !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6039  SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6040  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6041  }
6042 
6043  assert(HasParameterArea &&
6044  "Parameter area must exist to pass an argument in memory.");
6045  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6046  true, isTailCall, false, MemOpChains,
6047  TailCallArguments, dl);
6048 
6049  NeededLoad = true;
6050  }
6051  // When passing an array of floats, the array occupies consecutive
6052  // space in the argument area; only round up to the next doubleword
6053  // at the end of the array. Otherwise, each float takes 8 bytes.
6054  if (CallConv != CallingConv::Fast || NeededLoad) {
6055  ArgOffset += (Arg.getValueType() == MVT::f32 &&
6056  Flags.isInConsecutiveRegs()) ? 4 : 8;
6057  if (Flags.isInConsecutiveRegsLast())
6058  ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6059  }
6060  break;
6061  }
6062  case MVT::v4f32:
6063  case MVT::v4i32:
6064  case MVT::v8i16:
6065  case MVT::v16i8:
6066  case MVT::v2f64:
6067  case MVT::v2i64:
6068  case MVT::v1i128:
6069  case MVT::f128:
6070  if (!Subtarget.hasQPX()) {
6071  // These can be scalar arguments or elements of a vector array type
6072  // passed directly. The latter are used to implement ELFv2 homogenous
6073  // vector aggregates.
6074 
6075  // For a varargs call, named arguments go into VRs or on the stack as
6076  // usual; unnamed arguments always go to the stack or the corresponding
6077  // GPRs when within range. For now, we always put the value in both
6078  // locations (or even all three).
6079  if (isVarArg) {
6080  assert(HasParameterArea &&
6081  "Parameter area must exist if we have a varargs call.");
6082  // We could elide this store in the case where the object fits
6083  // entirely in R registers. Maybe later.
6084  SDValue Store =
6085  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6086  MemOpChains.push_back(Store);
6087  if (VR_idx != NumVRs) {
6088  SDValue Load =
6089  DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6090  MemOpChains.push_back(Load.getValue(1));
6091  RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6092  }
6093  ArgOffset += 16;
6094  for (unsigned i=0; i<16; i+=PtrByteSize) {
6095  if (GPR_idx == NumGPRs)
6096  break;
6097  SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6098  DAG.getConstant(i, dl, PtrVT));
6099  SDValue Load =
6100  DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6101  MemOpChains.push_back(Load.getValue(1));
6102  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6103  }
6104  break;
6105  }
6106 
6107  // Non-varargs Altivec params go into VRs or on the stack.
6108  if (VR_idx != NumVRs) {
6109  RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6110  } else {
6111  if (CallConv == CallingConv::Fast)
6112  ComputePtrOff();
6113 
6114  assert(HasParameterArea &&
6115  "Parameter area must exist to pass an argument in memory.");
6116  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6117  true, isTailCall, true, MemOpChains,
6118  TailCallArguments, dl);
6119  if (CallConv == CallingConv::Fast)
6120  ArgOffset += 16;
6121  }
6122 
6123  if (CallConv != CallingConv::Fast)
6124  ArgOffset += 16;
6125  break;
6126  } // not QPX
6127 
6129  "Invalid QPX parameter type");
6130 
6132  case MVT::v4f64:
6133  case MVT::v4i1: {
6134  bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
6135  if (isVarArg) {
6136  assert(HasParameterArea &&
6137  "Parameter area must exist if we have a varargs call.");
6138  // We could elide this store in the case where the object fits
6139  // entirely in R registers. Maybe later.
6140  SDValue Store =
6141  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6142  MemOpChains.push_back(Store);
6143  if (QFPR_idx != NumQFPRs) {
6144  SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, Store,
6145  PtrOff, MachinePointerInfo());
6146  MemOpChains.push_back(Load.getValue(1));
6147  RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
6148  }
6149  ArgOffset += (IsF32 ? 16 : 32);
6150  for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) {
6151  if (GPR_idx == NumGPRs)
6152  break;
6153  SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6154  DAG.getConstant(i, dl, PtrVT));
6155  SDValue Load =
6156  DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6157  MemOpChains.push_back(Load.getValue(1));
6158  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6159  }
6160  break;
6161  }
6162 
6163  // Non-varargs QPX params go into registers or on the stack.
6164  if (QFPR_idx != NumQFPRs) {
6165  RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
6166  } else {
6167  if (CallConv == CallingConv::Fast)
6168  ComputePtrOff();
6169 
6170  assert(HasParameterArea &&
6171  "Parameter area must exist to pass an argument in memory.");
6172  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6173  true, isTailCall, true, MemOpChains,
6174  TailCallArguments, dl);
6175  if (CallConv == CallingConv::Fast)
6176  ArgOffset += (IsF32 ? 16 : 32);
6177  }
6178 
6179  if (CallConv != CallingConv::Fast)
6180  ArgOffset += (IsF32 ? 16 : 32);
6181  break;
6182  }
6183  }
6184  }
6185 
6186  assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6187  "mismatch in size of parameter area");
6188  (void)NumBytesActuallyUsed;
6189 
6190  if (!MemOpChains.empty())
6191  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6192 
6193  // Check if this is an indirect call (MTCTR/BCTRL).
6194  // See PrepareCall() for more information about calls through function
6195  // pointers in the 64-bit SVR4 ABI.
6196  if (!isTailCall && !isPatchPoint &&
6197  !isFunctionGlobalAddress(Callee) &&
6198  !isa<ExternalSymbolSDNode>(Callee)) {
6199  // Load r2 into a virtual register and store it to the TOC save area.
6200  setUsesTOCBasePtr(DAG);
6201  SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6202  // TOC save area offset.
6203  unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6204  SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6205  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6206  Chain = DAG.getStore(
6207  Val.getValue(1), dl, Val, AddPtr,
6208  MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
6209  // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6210  // This does not mean the MTCTR instruction must use R12; it's easier
6211  // to model this as an extra parameter, so do that.
6212  if (isELFv2ABI && !isPatchPoint)
6213  RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6214  }
6215 
6216  // Build a sequence of copy-to-reg nodes chained together with token chain
6217  // and flag operands which copy the outgoing args into the appropriate regs.
6218  SDValue InFlag;
6219  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6220  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6221  RegsToPass[i].second, InFlag);
6222  InFlag = Chain.getValue(1);
6223  }
6224 
6225  if (isTailCall && !IsSibCall)
6226  PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6227  TailCallArguments);
6228 
6229  return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, hasNest,
6230  DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee,
6231  SPDiff, NumBytes, Ins, InVals, CS);
6232 }
6233 
6234 SDValue PPCTargetLowering::LowerCall_Darwin(
6235  SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
6236  bool isTailCall, bool isPatchPoint,
6237  const SmallVectorImpl<ISD::OutputArg> &Outs,
6238  const SmallVectorImpl<SDValue> &OutVals,
6239  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6240  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6241  ImmutableCallSite CS) const {
6242  unsigned NumOps = Outs.size();
6243 
6244  EVT PtrVT = getPointerTy(DAG.getDataLayout());
6245  bool isPPC64 = PtrVT == MVT::i64;
6246  unsigned PtrByteSize = isPPC64 ? 8 : 4;
6247 
6248  MachineFunction &MF = DAG.getMachineFunction();
6249 
6250  // Mark this function as potentially containing a function that contains a
6251  // tail call. As a consequence the frame pointer will be used for dynamicalloc
6252  // and restoring the callers stack pointer in this functions epilog. This is
6253  // done because by tail calling the called function might overwrite the value
6254  // in this function's (MF) stack pointer stack slot 0(SP).
6255  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6256  CallConv == CallingConv::Fast)
6257  MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6258 
6259  // Count how many bytes are to be pushed on the stack, including the linkage
6260  // area, and parameter passing area. We start with 24/48 bytes, which is
6261  // prereserved space for [SP][CR][LR][3 x unused].
6262  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6263  unsigned NumBytes = LinkageSize;
6264 
6265  // Add up all the space actually used.
6266  // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
6267  // they all go in registers, but we must reserve stack space for them for
6268  // possible use by the caller. In varargs or 64-bit calls, parameters are
6269  // assigned stack space in order, with padding so Altivec parameters are
6270  // 16-byte aligned.
6271  unsigned nAltivecParamsAtEnd = 0;
6272  for (unsigned i = 0; i != NumOps; ++i) {
6273  ISD::ArgFlagsTy Flags = Outs[i].Flags;
6274  EVT ArgVT = Outs[i].VT;
6275  // Varargs Altivec parameters are padded to a 16 byte boundary.
6276  if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
6277  ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
6278  ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
6279  if (!isVarArg && !isPPC64) {
6280  // Non-varargs Altivec parameters go after all the non-Altivec
6281  // parameters; handle those later so we know how much padding we need.
6282  nAltivecParamsAtEnd++;
6283  continue;
6284  }
6285  // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
6286  NumBytes = ((NumBytes+15)/16)*16;
6287  }
6288  NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6289  }
6290 
6291  // Allow for Altivec parameters at the end, if needed.
6292  if (nAltivecParamsAtEnd) {
6293  NumBytes = ((NumBytes+15)/16)*16;
6294  NumBytes += 16*nAltivecParamsAtEnd;
6295  }
6296 
6297  // The prolog code of the callee may store up to 8 GPR argument registers to
6298  // the stack, allowing va_start to index over them in memory if its varargs.
6299  // Because we cannot tell if this is needed on the caller side, we have to
6300  // conservatively assume that it is needed. As such, make sure we have at
6301  // least enough stack space for the caller to store the 8 GPRs.
6302  NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6303 
6304  // Tail call needs the stack to be aligned.
6305  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6306  CallConv == CallingConv::Fast)
6307  NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6308 
6309  // Calculate by how many bytes the stack has to be adjusted in case of tail
6310  // call optimization.
6311  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
6312 
6313  // To protect arguments on the stack from being clobbered in a tail call,
6314  // force all the loads to happen before doing any other lowering.
6315  if (isTailCall)
6316  Chain = DAG.getStackArgumentTokenFactor(Chain);
6317 
6318  // Adjust the stack pointer for the new arguments...
6319  // These operations are automatically eliminated by the prolog/epilog pass
6320  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6321  SDValue CallSeqStart = Chain;
6322 
6323  // Load the return address and frame pointer so it can be move somewhere else
6324  // later.
6325  SDValue LROp, FPOp;
6326  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6327 
6328  // Set up a copy of the stack pointer for use loading and storing any
6329  // arguments that may not fit in the registers available for argument
6330  // passing.
6331  SDValue StackPtr;
6332  if (isPPC64)
6333  StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6334  else
6335  StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6336 
6337  // Figure out which arguments are going to go in registers, and which in
6338  // memory. Also, if this is a vararg function, floating point operations
6339  // must be stored to our stack, and loaded into integer regs as well, if
6340  // any integer regs are available for argument passing.
6341  unsigned ArgOffset = LinkageSize;
6342  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6343 
6344  static const MCPhysReg GPR_32[] = { // 32-bit registers.
6345  PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6346  PPC::R7, PPC::R8, PPC::R9, PPC::R10,
6347  };
6348  static const MCPhysReg GPR_64[] = { // 64-bit registers.
6349  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6350  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6351  };
6352  static const MCPhysReg VR[] = {
6353  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6354  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6355  };
6356  const unsigned NumGPRs = array_lengthof(GPR_32);
6357  const unsigned NumFPRs = 13;
6358  const unsigned NumVRs = array_lengthof(VR);
6359 
6360  const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
6361 
6363  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6364 
6365  SmallVector<SDValue, 8> MemOpChains;
6366  for (unsigned i = 0; i != NumOps; ++i) {
6367  SDValue Arg = OutVals[i];
6368  ISD::ArgFlagsTy Flags = Outs[i].Flags;
6369 
6370  // PtrOff will be used to store the current argument to the stack if a
6371  // register cannot be found for it.
6372  SDValue PtrOff;
6373 
6374  PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6375 
6376  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6377 
6378  // On PPC64, promote integers to 64-bit values.
6379  if (isPPC64 && Arg.getValueType() == MVT::i32) {
6380  // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6381  unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6382  Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6383  }
6384 
6385  // FIXME memcpy is used way more than necessary. Correctness first.
6386  // Note: "by value" is code for passing a structure by value, not
6387  // basic types.
6388  if (Flags.isByVal()) {
6389  unsigned Size = Flags.getByValSize();
6390  // Very small objects are passed right-justified. Everything else is
6391  // passed left-justified.
6392  if (Size==1 || Size==2) {
6393  EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
6394  if (GPR_idx != NumGPRs) {
6395  SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6396  MachinePointerInfo(), VT);
6397  MemOpChains.push_back(Load.getValue(1));
6398  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6399 
6400  ArgOffset += PtrByteSize;
6401  } else {
6402  SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6403  PtrOff.getValueType());
6404  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6405  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6406  CallSeqStart,
6407  Flags, DAG, dl);
6408  ArgOffset += PtrByteSize;
6409  }
6410  continue;
6411  }
6412  // Copy entire object into memory. There are cases where gcc-generated
6413  // code assumes it is there, even if it could be put entirely into
6414  // registers. (This is not what the doc says.)
6415  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6416  CallSeqStart,
6417  Flags, DAG, dl);
6418 
6419  // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
6420  // copy the pieces of the object that fit into registers from the
6421  // parameter save area.
6422  for (unsigned j=0; j<Size; j+=PtrByteSize) {
6423  SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6424  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6425  if (GPR_idx != NumGPRs) {
6426  SDValue Load =
6427  DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6428  MemOpChains.push_back(Load.getValue(1));
6429  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6430  ArgOffset += PtrByteSize;
6431  } else {
6432  ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6433  break;
6434  }
6435  }
6436  continue;
6437  }
6438 
6439  switch (Arg.getSimpleValueType().SimpleTy) {
6440  default: llvm_unreachable("Unexpected ValueType for argument!");
6441  case MVT::i1:
6442  case MVT::i32:
6443  case MVT::i64:
6444  if (GPR_idx != NumGPRs) {
6445  if (Arg.getValueType() == MVT::i1)
6446  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
6447 
6448  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6449  } else {
6450  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6451  isPPC64, isTailCall, false, MemOpChains,
6452  TailCallArguments, dl);
6453  }
6454  ArgOffset += PtrByteSize;
6455  break;
6456  case MVT::f32:
6457  case MVT::f64:
6458  if (FPR_idx != NumFPRs) {
6459  RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6460 
6461  if (isVarArg) {
6462  SDValue Store =
6463  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6464  MemOpChains.push_back(Store);
6465 
6466  // Float varargs are always shadowed in available integer registers
6467  if (GPR_idx != NumGPRs) {
6468  SDValue Load =
6469  DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6470  MemOpChains.push_back(Load.getValue(1));
6471  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6472  }
6473  if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
6474  SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6475  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6476  SDValue Load =
6477  DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6478  MemOpChains.push_back(Load.getValue(1));
6479  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6480  }
6481  } else {
6482  // If we have any FPRs remaining, we may also have GPRs remaining.
6483  // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
6484  // GPRs.
6485  if (GPR_idx != NumGPRs)
6486  ++GPR_idx;
6487  if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
6488  !isPPC64) // PPC64 has 64-bit GPR's obviously :)
6489  ++GPR_idx;
6490  }
6491  } else
6492  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6493  isPPC64, isTailCall, false, MemOpChains,
6494  TailCallArguments, dl);
6495  if (isPPC64)
6496  ArgOffset += 8;
6497  else
6498  ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
6499  break;
6500  case MVT::v4f32:
6501  case MVT::v4i32:
6502  case MVT::v8i16:
6503  case MVT::v16i8:
6504  if (isVarArg) {
6505  // These go aligned on the stack, or in the corresponding R registers
6506  // when within range. The Darwin PPC ABI doc claims they also go in
6507  // V registers; in fact gcc does this only for arguments that are
6508  // prototyped, not for those that match the ... We do it for all
6509  // arguments, seems to work.
6510  while (ArgOffset % 16 !=0) {
6511  ArgOffset += PtrByteSize;
6512  if (GPR_idx != NumGPRs)
6513  GPR_idx++;
6514  }
6515  // We could elide this store in the case where the object fits
6516  // entirely in R registers. Maybe later.
6517  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
6518  DAG.getConstant(ArgOffset, dl, PtrVT));
6519  SDValue Store =
6520  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6521  MemOpChains.push_back(Store);
6522  if (VR_idx != NumVRs) {
6523  SDValue Load =
6524  DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6525  MemOpChains.push_back(Load.getValue(1));
6526  RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6527  }
6528  ArgOffset += 16;
6529  for (unsigned i=0; i<16; i+=PtrByteSize) {
6530  if (GPR_idx == NumGPRs)
6531  break;
6532  SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6533  DAG.getConstant(i, dl, PtrVT));
6534  SDValue Load =
6535  DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6536  MemOpChains.push_back(Load.getValue(1));
6537  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6538  }
6539  break;
6540  }
6541 
6542  // Non-varargs Altivec params generally go in registers, but have
6543  // stack space allocated at the end.
6544  if (VR_idx != NumVRs) {
6545  // Doesn't have GPR space allocated.
6546  RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6547  } else if (nAltivecParamsAtEnd==0) {
6548  // We are emitting Altivec params in order.
6549  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6550  isPPC64, isTailCall, true, MemOpChains,
6551  TailCallArguments, dl);
6552  ArgOffset += 16;
6553  }
6554  break;
6555  }
6556  }
6557  // If all Altivec parameters fit in registers, as they usually do,
6558  // they get stack space following the non-Altivec parameters. We
6559  // don't track this here because nobody below needs it.
6560  // If there are more Altivec parameters than fit in registers emit
6561  // the stores here.
6562  if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
6563  unsigned j = 0;
6564  // Offset is aligned; skip 1st 12 params which go in V registers.
6565  ArgOffset = ((ArgOffset+15)/16)*16;
6566  ArgOffset += 12*16;
6567  for (unsigned i = 0; i != NumOps; ++i) {
6568  SDValue Arg = OutVals[i];
6569  EVT ArgType = Outs[i].VT;
6570  if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
6571  ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
6572  if (++j > NumVRs) {
6573  SDValue PtrOff;
6574  // We are emitting Altivec params in order.
6575  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6576  isPPC64, isTailCall, true, MemOpChains,
6577  TailCallArguments, dl);
6578  ArgOffset += 16;
6579  }
6580  }
6581  }
6582  }
6583 
6584  if (!MemOpChains.empty())
6585  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6586 
6587  // On Darwin, R12 must contain the address of an indirect callee. This does
6588  // not mean the MTCTR instruction must use R12; it's easier to model this as
6589  // an extra parameter, so do that.
6590  if (!isTailCall &&
6591  !isFunctionGlobalAddress(Callee) &&
6592  !isa<ExternalSymbolSDNode>(Callee) &&
6593  !isBLACompatibleAddress(Callee, DAG))
6594  RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
6595  PPC::R12), Callee));
6596 
6597  // Build a sequence of copy-to-reg nodes chained together with token chain
6598  // and flag operands which copy the outgoing args into the appropriate regs.
6599  SDValue InFlag;
6600  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6601  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6602  RegsToPass[i].second, InFlag);
6603  InFlag = Chain.getValue(1);
6604  }
6605 
6606  if (isTailCall)
6607  PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6608  TailCallArguments);
6609 
6610  return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
6611  /* unused except on PPC64 ELFv1 */ false, DAG,
6612  RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
6613  NumBytes, Ins, InVals, CS);
6614 }
6615 
6616 bool
6617 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
6618  MachineFunction &MF, bool isVarArg,
6619  const SmallVectorImpl<ISD::OutputArg> &Outs,
6620  LLVMContext &Context) const {
6622  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
6623  return CCInfo.CheckReturn(
6624  Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
6625  ? RetCC_PPC_Cold
6626  : RetCC_PPC);
6627 }
6628 
6629 SDValue
6630 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
6631  bool isVarArg,
6632  const SmallVectorImpl<ISD::OutputArg> &Outs,
6633  const SmallVectorImpl<SDValue> &OutVals,
6634  const SDLoc &dl, SelectionDAG &DAG) const {
6636  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
6637  *DAG.getContext());
6638  CCInfo.AnalyzeReturn(Outs,
6639  (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
6640  ? RetCC_PPC_Cold
6641  : RetCC_PPC);
6642 
6643  SDValue Flag;
6644  SmallVector<SDValue, 4> RetOps(1, Chain);
6645 
6646  // Copy the result values into the output registers.
6647  for (unsigned i = 0; i != RVLocs.size(); ++i) {
6648  CCValAssign &VA = RVLocs[i];
6649  assert(VA.isRegLoc() && "Can only return in registers!");
6650 
6651  SDValue Arg = OutVals[i];
6652 
6653  switch (VA.getLocInfo()) {
6654  default: llvm_unreachable("Unknown loc info!");
6655  case CCValAssign::Full: break;
6656  case CCValAssign::AExt:
6657  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
6658  break;
6659  case CCValAssign::ZExt:
6660  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
6661  break;
6662  case CCValAssign::SExt:
6663  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
6664  break;
6665  }
6666 
6667  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
6668  Flag = Chain.getValue(1);
6669  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
6670  }
6671 
6672  const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
6673  const MCPhysReg *I =
6675  if (I) {
6676  for (; *I; ++I) {
6677 
6678  if (PPC::G8RCRegClass.contains(*I))
6679  RetOps.push_back(DAG.getRegister(*I, MVT::i64));
6680  else if (PPC::F8RCRegClass.contains(*I))
6681  RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
6682  else if (PPC::CRRCRegClass.contains(*I))
6683  RetOps.push_back(DAG.getRegister(*I, MVT::i1));
6684  else if (PPC::VRRCRegClass.contains(*I))
6685  RetOps.push_back(DAG.getRegister(*I, MVT::Other));
6686  else
6687  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
6688  }
6689  }
6690 
6691  RetOps[0] = Chain; // Update chain.
6692 
6693  // Add the flag if we have it.
6694  if (Flag.getNode())
6695  RetOps.push_back(Flag);
6696 
6697  return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
6698 }
6699 
6700 SDValue
6701 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
6702  SelectionDAG &DAG) const {
6703  SDLoc dl(Op);
6704 
6705  // Get the correct type for integers.
6706  EVT IntVT = Op.getValueType();
6707 
6708  // Get the inputs.
6709  SDValue Chain = Op.getOperand(0);
6710  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
6711  // Build a DYNAREAOFFSET node.
6712  SDValue Ops[2] = {Chain, FPSIdx};
6713  SDVTList VTs = DAG.getVTList(IntVT);
6714  return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
6715 }
6716 
6717 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
6718  SelectionDAG &DAG) const {
6719  // When we pop the dynamic allocation we need to restore the SP link.
6720  SDLoc dl(Op);
6721 
6722  // Get the correct type for pointers.
6723  EVT PtrVT = getPointerTy(DAG.getDataLayout());
6724 
6725  // Construct the stack pointer operand.
6726  bool isPPC64 = Subtarget.isPPC64();
6727  unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
6728  SDValue StackPtr = DAG.getRegister(SP, PtrVT);
6729 
6730  // Get the operands for the STACKRESTORE.
6731  SDValue Chain = Op.getOperand(0);
6732  SDValue SaveSP = Op.getOperand(1);
6733 
6734  // Load the old link SP.
6735  SDValue LoadLinkSP =
6736  DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
6737 
6738  // Restore the stack pointer.
6739  Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
6740 
6741  // Store the old link SP.
6742  return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
6743 }
6744 
6745 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
6746  MachineFunction &MF = DAG.getMachineFunction();
6747  bool isPPC64 = Subtarget.isPPC64();
6748  EVT PtrVT = getPointerTy(MF.getDataLayout());
6749 
6750  // Get current frame pointer save index. The users of this index will be
6751  // primarily DYNALLOC instructions.
6753  int RASI = FI->getReturnAddrSaveIndex();
6754 
6755  // If the frame pointer save index hasn't been defined yet.
6756  if (!RASI) {
6757  // Find out what the fix offset of the frame pointer save area.
6758  int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
6759  // Allocate the frame index for frame pointer save area.
6760  RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
6761  // Save the result.
6762  FI->setReturnAddrSaveIndex(RASI);
6763  }
6764  return DAG.getFrameIndex(RASI, PtrVT);
6765 }
6766 
6767 SDValue
6768 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
6769  MachineFunction &MF = DAG.getMachineFunction();
6770  bool isPPC64 = Subtarget.isPPC64();
6771  EVT PtrVT = getPointerTy(MF.getDataLayout());
6772 
6773  // Get current frame pointer save index. The users of this index will be
6774  // primarily DYNALLOC instructions.
6776  int FPSI = FI->getFramePointerSaveIndex();
6777 
6778  // If the frame pointer save index hasn't been defined yet.
6779  if (!FPSI) {
6780  // Find out what the fix offset of the frame pointer save area.
6781  int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
6782  // Allocate the frame index for frame pointer save area.
6783  FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
6784  // Save the result.
6785  FI->setFramePointerSaveIndex(FPSI);
6786  }
6787  return DAG.getFrameIndex(FPSI, PtrVT);
6788 }
6789 
6790 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
6791  SelectionDAG &DAG) const {
6792  // Get the inputs.
6793  SDValue Chain = Op.getOperand(0);
6794  SDValue Size = Op.getOperand(1);
6795  SDLoc dl(Op);
6796 
6797  // Get the correct type for pointers.
6798  EVT PtrVT = getPointerTy(DAG.getDataLayout());
6799  // Negate the size.
6800  SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
6801  DAG.getConstant(0, dl, PtrVT), Size);
6802  // Construct a node for the frame pointer save index.
6803  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
6804  // Build a DYNALLOC node.
6805  SDValue Ops[3] = { Chain, NegSize, FPSIdx };
6806  SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
6807  return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
6808 }
6809 
6810 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
6811  SelectionDAG &DAG) const {
6812  MachineFunction &MF = DAG.getMachineFunction();
6813 
6814  bool isPPC64 = Subtarget.isPPC64();
6815  EVT PtrVT = getPointerTy(DAG.getDataLayout());
6816 
6817  int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
6818  return DAG.getFrameIndex(FI, PtrVT);
6819 }
6820 
6821 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
6822  SelectionDAG &DAG) const {
6823  SDLoc DL(Op);
6824  return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
6826  Op.getOperand(0), Op.getOperand(1));
6827 }
6828 
6829 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
6830  SelectionDAG &DAG) const {
6831  SDLoc DL(Op);
6832  return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
6833  Op.getOperand(0), Op.getOperand(1));
6834 }
6835 
6836 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
6837  if (Op.getValueType().isVector())
6838  return LowerVectorLoad(Op, DAG);
6839 
6840  assert(Op.getValueType() == MVT::i1 &&
6841  "Custom lowering only for i1 loads");
6842 
6843  // First, load 8 bits into 32 bits, then truncate to 1 bit.
6844 
6845  SDLoc dl(Op);
6846  LoadSDNode *LD = cast<LoadSDNode>(Op);
6847 
6848  SDValue Chain = LD->getChain();
6849  SDValue BasePtr = LD->getBasePtr();
6850  MachineMemOperand *MMO = LD->getMemOperand();
6851 
6852  SDValue NewLD =
6853  DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
6854  BasePtr, MVT::i8, MMO);
6855  SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
6856 
6857  SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
6858  return DAG.getMergeValues(Ops, dl);
6859 }
6860 
6861 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
6862  if (Op.getOperand(1).getValueType().isVector())
6863  return LowerVectorStore(Op, DAG);
6864 
6865  assert(Op.getOperand(1).getValueType() == MVT::i1 &&
6866  "Custom lowering only for i1 stores");
6867 
6868  // First, zero extend to 32 bits, then use a truncating store to 8 bits.
6869 
6870  SDLoc dl(Op);
6871  StoreSDNode *ST = cast<StoreSDNode>(Op);
6872 
6873  SDValue Chain = ST->getChain();
6874  SDValue BasePtr = ST->getBasePtr();
6875  SDValue Value = ST->getValue();
6876  MachineMemOperand *MMO = ST->getMemOperand();
6877 
6878  Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
6879  Value);
6880  return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
6881 }
6882 
6883 // FIXME: Remove this once the ANDI glue bug is fixed:
6884 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
6885  assert(Op.getValueType() == MVT::i1 &&
6886  "Custom lowering only for i1 results");
6887 
6888  SDLoc DL(Op);
6889  return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
6890  Op.getOperand(0));
6891 }
6892 
6893 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
6894 /// possible.
6895 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
6896  // Not FP? Not a fsel.
6897  if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
6899  return Op;
6900 
6901  // We might be able to do better than this under some circumstances, but in
6902  // general, fsel-based lowering of select is a finite-math-only optimization.
6903  // For more information, see section F.3 of the 2.06 ISA specification.
6904  if (!DAG.getTarget().Options.NoInfsFPMath ||
6906  return Op;
6907  // TODO: Propagate flags from the select rather than global settings.
6908  SDNodeFlags Flags;
6909  Flags.setNoInfs(true);
6910  Flags.setNoNaNs(true);
6911 
6912  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
6913 
6914  EVT ResVT = Op.getValueType();
6915  EVT CmpVT = Op.getOperand(0).getValueType();
6916  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
6917  SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
6918  SDLoc dl(Op);
6919 
6920  // If the RHS of the comparison is a 0.0, we don't need to do the
6921  // subtraction at all.
6922  SDValue Sel1;
6923  if (isFloatingPointZero(RHS))
6924  switch (CC) {
6925  default: break; // SETUO etc aren't handled by fsel.
6926  case ISD::SETNE:
6927  std::swap(TV, FV);
6929  case ISD::SETEQ:
6930  if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
6931  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6932  Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
6933  if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
6934  Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
6935  return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6936  DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
6937  case ISD::SETULT:
6938  case ISD::SETLT:
6939  std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
6941  case ISD::SETOGE:
6942  case ISD::SETGE:
6943  if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
6944  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6945  return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
6946  case ISD::SETUGT:
6947  case ISD::SETGT:
6948  std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
6950  case ISD::SETOLE:
6951  case ISD::SETLE:
6952  if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
6953  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6954  return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6955  DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
6956  }
6957 
6958  SDValue Cmp;
6959  switch (CC) {
6960  default: break; // SETUO etc aren't handled by fsel.
6961  case ISD::SETNE:
6962  std::swap(TV, FV);
6964  case ISD::SETEQ:
6965  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
6966  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
6967  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6968  Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6969  if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
6970  Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
6971  return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6972  DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
6973  case ISD::SETULT:
6974  case ISD::SETLT:
6975  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
6976  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
6977  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6978  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
6979  case ISD::SETOGE:
6980  case ISD::SETGE:
6981  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
6982  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
6983  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6984  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6985  case ISD::SETUGT:
6986  case ISD::SETGT:
6987  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
6988  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
6989  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6990  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
6991  case ISD::SETOLE:
6992  case ISD::SETLE:
6993  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
6994  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
6995  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6996  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6997  }
6998  return Op;
6999 }
7000 
7001 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
7002  SelectionDAG &DAG,
7003  const SDLoc &dl) const {
7005  SDValue Src = Op.getOperand(0);
7006  if (Src.getValueType() == MVT::f32)
7007  Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
7008 
7009  SDValue Tmp;
7010  switch (Op.getSimpleValueType().SimpleTy) {
7011  default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
7012  case MVT::i32:
7013  Tmp = DAG.getNode(
7014  Op.getOpcode() == ISD::FP_TO_SINT
7015  ? PPCISD::FCTIWZ
7016  : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
7017  dl, MVT::f64, Src);
7018  break;
7019  case MVT::i64:
7020  assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
7021  "i64 FP_TO_UINT is supported only with FPCVT");
7022  Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
7024  dl, MVT::f64, Src);
7025  break;
7026  }
7027 
7028  // Convert the FP value to an int value through memory.
7029  bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
7030  (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
7031  SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
7032  int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
7033  MachinePointerInfo MPI =
7035 
7036  // Emit a store to the stack slot.
7037  SDValue Chain;
7038  if (i32Stack) {
7039  MachineFunction &MF = DAG.getMachineFunction();
7040  MachineMemOperand *MMO =
7042  SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
7043  Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
7044  DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
7045  } else
7046  Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI);
7047 
7048  // Result is a load from the stack slot. If loading 4 bytes, make sure to
7049  // add in a bias on big endian.
7050  if (Op.getValueType() == MVT::i32 && !i32Stack) {
7051  FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
7052  DAG.getConstant(4, dl, FIPtr.getValueType()));
7053  MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
7054  }
7055 
7056  RLI.Chain = Chain;
7057  RLI.Ptr = FIPtr;
7058  RLI.MPI = MPI;
7059 }
7060 
7061 /// Custom lowers floating point to integer conversions to use
7062 /// the direct move instructions available in ISA 2.07 to avoid the
7063 /// need for load/store combinations.
7064 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
7065  SelectionDAG &DAG,
7066  const SDLoc &dl) const {
7068  SDValue Src = Op.getOperand(0);
7069 
7070  if (Src.getValueType() == MVT::f32)
7071  Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
7072 
7073  SDValue Tmp;
7074  switch (Op.getSimpleValueType().SimpleTy) {
7075  default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
7076  case MVT::i32:
7077  Tmp = DAG.getNode(
7078  Op.getOpcode() == ISD::FP_TO_SINT
7079  ? PPCISD::FCTIWZ
7080  : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
7081  dl, MVT::f64, Src);
7082  Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp);
7083  break;
7084  case MVT::i64:
7085  assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
7086  "i64 FP_TO_UINT is supported only with FPCVT");
7087  Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
7089  dl, MVT::f64, Src);
7090  Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp);
7091  break;
7092  }
7093  return Tmp;
7094 }
7095 
7096 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
7097  const SDLoc &dl) const {
7098 
7099  // FP to INT conversions are legal for f128.
7101  return Op;
7102 
7103  // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
7104  // PPC (the libcall is not available).
7105  if (Op.getOperand(0).getValueType() == MVT::ppcf128) {
7106  if (Op.getValueType() == MVT::i32) {
7107  if (Op.getOpcode() == ISD::FP_TO_SINT) {
7109  MVT::f64, Op.getOperand(0),
7110  DAG.getIntPtrConstant(0, dl));
7112  MVT::f64, Op.getOperand(0),
7113  DAG.getIntPtrConstant(1, dl));
7114 
7115  // Add the two halves of the long double in round-to-zero mode.
7116  SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
7117 
7118  // Now use a smaller FP_TO_SINT.
7119  return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
7120  }
7121  if (Op.getOpcode() == ISD::FP_TO_UINT) {
7122  const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
7123  APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
7124  SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128);
7125  // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
7126  // FIXME: generated code sucks.
7127  // TODO: Are there fast-math-flags to propagate to this FSUB?
7128  SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128,
7129  Op.getOperand(0), Tmp);
7130  True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
7131  True = DAG.getNode(ISD::ADD, dl, MVT::i32, True,
7132  DAG.getConstant(0x80000000, dl, MVT::i32));
7133  SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
7134  Op.getOperand(0));
7135  return DAG.getSelectCC(dl, Op.getOperand(0), Tmp, True, False,
7136  ISD::SETGE);
7137  }
7138  }
7139 
7140  return SDValue();
7141  }
7142 
7143  if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
7144  return LowerFP_TO_INTDirectMove(Op, DAG, dl);
7145 
7146  ReuseLoadInfo RLI;
7147  LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
7148 
7149  return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
7150  RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
7151 }
7152 
7153 // We're trying to insert a regular store, S, and then a load, L. If the
7154 // incoming value, O, is a load, we might just be able to have our load use the
7155 // address used by O. However, we don't know if anything else will store to
7156 // that address before we can load from it. To prevent this situation, we need
7157 // to insert our load, L, into the chain as a peer of O. To do this, we give L
7158 // the same chain operand as O, we create a token factor from the chain results
7159 // of O and L, and we replace all uses of O's chain result with that token
7160 // factor (see spliceIntoChain below for this last part).
7161 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
7162  ReuseLoadInfo &RLI,
7163  SelectionDAG &DAG,
7164  ISD::LoadExtType ET) const {
7165  SDLoc dl(Op);
7166  if (ET == ISD::NON_EXTLOAD &&
7167  (Op.getOpcode() == ISD::FP_TO_UINT ||
7168  Op.getOpcode() == ISD::FP_TO_SINT) &&
7170  Op.getOperand(0).getValueType())) {
7171 
7172  LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
7173  return true;
7174  }
7175 
7177  if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
7178  LD->isNonTemporal())
7179  return false;
7180  if (LD->getMemoryVT() != MemVT)
7181  return false;
7182 
7183  RLI.Ptr = LD->getBasePtr();
7184  if (LD->isIndexed() && !LD->getOffset().isUndef()) {
7186  "Non-pre-inc AM on PPC?");
7187  RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
7188  LD->getOffset());
7189  }
7190 
7191  RLI.Chain = LD->getChain();
7192  RLI.MPI = LD->getPointerInfo();
7193  RLI.IsDereferenceable = LD->isDereferenceable();
7194  RLI.IsInvariant = LD->isInvariant();
7195  RLI.Alignment = LD->getAlignment();
7196  RLI.AAInfo = LD->getAAInfo();
7197  RLI.Ranges = LD->getRanges();
7198 
7199  RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
7200  return true;
7201 }
7202 
7203 // Given the head of the old chain, ResChain, insert a token factor containing
7204 // it and NewResChain, and make users of ResChain now be users of that token
7205 // factor.
7206 // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
7207 void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
7208  SDValue NewResChain,
7209  SelectionDAG &DAG) const {
7210  if (!ResChain)
7211  return;
7212 
7213  SDLoc dl(NewResChain);
7214 
7216  NewResChain, DAG.getUNDEF(MVT::Other));
7217  assert(TF.getNode() != NewResChain.getNode() &&
7218  "A new TF really is required here");
7219 
7220  DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
7221  DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
7222 }
7223 
7224 /// Analyze profitability of direct move
7225 /// prefer float load to int load plus direct move
7226 /// when there is no integer use of int load
7227 bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
7228  SDNode *Origin = Op.getOperand(0).getNode();
7229  if (Origin->getOpcode() != ISD::LOAD)
7230  return true;
7231 
7232  // If there is no LXSIBZX/LXSIHZX, like Power8,
7233  // prefer direct move if the memory size is 1 or 2 bytes.
7234  MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
7235  if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
7236  return true;
7237 
7238  for (SDNode::use_iterator UI = Origin->use_begin(),
7239  UE = Origin->use_end();
7240  UI != UE; ++UI) {
7241 
7242  // Only look at the users of the loaded value.
7243  if (UI.getUse().get().getResNo() != 0)
7244  continue;
7245 
7246  if (UI->getOpcode() != ISD::SINT_TO_FP &&
7247  UI->getOpcode() != ISD::UINT_TO_FP)
7248  return true;
7249  }
7250 
7251  return false;
7252 }
7253 
7254 /// Custom lowers integer to floating point conversions to use
7255 /// the direct move instructions available in ISA 2.07 to avoid the
7256 /// need for load/store combinations.
7257 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
7258  SelectionDAG &DAG,
7259  const SDLoc &dl) const {
7260  assert((Op.getValueType() == MVT::f32 ||
7261  Op.getValueType() == MVT::f64) &&
7262  "Invalid floating point type as target of conversion");
7263  assert(Subtarget.hasFPCVT() &&
7264  "Int to FP conversions with direct moves require FPCVT");
7265  SDValue FP;
7266  SDValue Src = Op.getOperand(0);
7267  bool SinglePrec = Op.getValueType() == MVT::f32;
7268  bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
7269  bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
7270  unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) :
7271  (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU);
7272 
7273  if (WordInt) {
7274  FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ,
7275  dl, MVT::f64, Src);
7276  FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
7277  }
7278  else {
7279  FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src);
7280  FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
7281  }
7282 
7283  return FP;
7284 }
7285 
7286 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
7287 
7288  EVT VecVT = Vec.getValueType();
7289  assert(VecVT.isVector() && "Expected a vector type.");
7290  assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
7291 
7292  EVT EltVT = VecVT.getVectorElementType();
7293  unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7294  EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7295 
7296  unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
7297  SmallVector<SDValue, 16> Ops(NumConcat);
7298  Ops[0] = Vec;
7299  SDValue UndefVec = DAG.getUNDEF(VecVT);
7300  for (unsigned i = 1; i < NumConcat; ++i)
7301  Ops[i] = UndefVec;
7302 
7303  return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
7304 }
7305 
7306 SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
7307  const SDLoc &dl) const {
7308 
7309  unsigned Opc = Op.getOpcode();
7310  assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP) &&
7311  "Unexpected conversion type");
7312  assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
7313  "Supports conversions to v2f64/v4f32 only.");
7314 
7315  bool SignedConv = Opc == ISD::SINT_TO_FP;
7316  bool FourEltRes = Op.getValueType() == MVT::v4f32;
7317 
7318  SDValue Wide = widenVec(DAG, Op.getOperand(0), dl);
7319  EVT WideVT = Wide.getValueType();
7320  unsigned WideNumElts = WideVT.getVectorNumElements();
7321  MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
7322 
7323  SmallVector<int, 16> ShuffV;
7324  for (unsigned i = 0; i < WideNumElts; ++i)
7325  ShuffV.push_back(i + WideNumElts);
7326 
7327  int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
7328  int SaveElts = FourEltRes ? 4 : 2;
7329  if (Subtarget.isLittleEndian())
7330  for (int i = 0; i < SaveElts; i++)
7331  ShuffV[i * Stride] = i;
7332  else
7333  for (int i = 1; i <= SaveElts; i++)
7334  ShuffV[i * Stride - 1] = i - 1;
7335 
7336  SDValue ShuffleSrc2 =
7337  SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
7338  SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
7339  unsigned ExtendOp =
7340  SignedConv ? (unsigned)PPCISD::SExtVElems : (unsigned)ISD::BITCAST;
7341 
7342  SDValue Extend;
7343  if (!Subtarget.hasP9Altivec() && SignedConv) {
7344  Arrange = DAG.getBitcast(IntermediateVT, Arrange);
7345  Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
7346  DAG.getValueType(Op.getOperand(0).getValueType()));
7347  } else
7348  Extend = DAG.getNode(ExtendOp, dl, IntermediateVT, Arrange);
7349 
7350  return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
7351 }
7352 
7353 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
7354  SelectionDAG &DAG) const {
7355  SDLoc dl(Op);
7356 
7357  EVT InVT = Op.getOperand(0).getValueType();
7358  EVT OutVT = Op.getValueType();
7359  if (OutVT.isVector() && OutVT.isFloatingPoint() &&
7360  isOperationCustom(Op.getOpcode(), InVT))
7361  return LowerINT_TO_FPVector(Op, DAG, dl);
7362 
7363  // Conversions to f128 are legal.
7364  if (EnableQuadPrecision && (Op.getValueType() == MVT::f128))
7365  return Op;
7366 
7367  if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
7368  if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64)
7369  return SDValue();
7370 
7371  SDValue Value = Op.getOperand(0);
7372  // The values are now known to be -1 (false) or 1 (true). To convert this
7373  // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
7374  // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
7375  Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
7376 
7377  SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
7378 
7379  Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
7380 
7381  if (Op.getValueType() != MVT::v4f64)
7382  Value = DAG.getNode(ISD::FP_ROUND, dl,
7383  Op.getValueType(), Value,
7384  DAG.getIntPtrConstant(1, dl));
7385  return Value;
7386  }
7387 
7388  // Don't handle ppc_fp128 here; let it be lowered to a libcall.
7389  if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
7390  return SDValue();
7391 
7392  if (Op.getOperand(0).getValueType() == MVT::i1)
7393  return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
7394  DAG.getConstantFP(1.0, dl, Op.getValueType()),
7395  DAG.getConstantFP(0.0, dl, Op.getValueType()));
7396 
7397  // If we have direct moves, we can do all the conversion, skip the store/load
7398  // however, without FPCVT we can't do most conversions.
7399  if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
7400  Subtarget.isPPC64() && Subtarget.hasFPCVT())
7401  return LowerINT_TO_FPDirectMove(Op, DAG, dl);
7402 
7403  assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
7404  "UINT_TO_FP is supported only with FPCVT");
7405 
7406  // If we have FCFIDS, then use it when converting to single-precision.
7407  // Otherwise, convert to double-precision and then round.
7408  unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
7410  : PPCISD::FCFIDS)
7412  : PPCISD::FCFID);
7413  MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
7414  ? MVT::f32
7415  : MVT::f64;
7416 
7417  if (Op.getOperand(0).getValueType() == MVT::i64) {
7418  SDValue SINT = Op.getOperand(0);
7419  // When converting to single-precision, we actually need to convert
7420  // to double-precision first and then round to single-precision.
7421  // To avoid double-rounding effects during that operation, we have
7422  // to prepare the input operand. Bits that might be truncated when
7423  // converting to double-precision are replaced by a bit that won't
7424  // be lost at this stage, but is below the single-precision rounding
7425  // position.
7426  //
7427  // However, if -enable-unsafe-fp-math is in effect, accept double
7428  // rounding to avoid the extra overhead.
7429  if (Op.getValueType() == MVT::f32 &&
7430  !Subtarget.hasFPCVT() &&
7431  !DAG.getTarget().Options.UnsafeFPMath) {
7432 
7433  // Twiddle input to make sure the low 11 bits are zero. (If this
7434  // is the case, we are guaranteed the value will fit into the 53 bit
7435  // mantissa of an IEEE double-precision value without rounding.)
7436  // If any of those low 11 bits were not zero originally, make sure
7437  // bit 12 (value 2048) is set instead, so that the final rounding
7438  // to single-precision gets the correct result.
7439  SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
7440  SINT, DAG.getConstant(2047, dl, MVT::i64));
7441  Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
7442  Round, DAG.getConstant(2047, dl, MVT::i64));
7443  Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
7444  Round = DAG.getNode(ISD::AND, dl, MVT::i64,
7445  Round, DAG.getConstant(-2048, dl, MVT::i64));
7446 
7447  // However, we cannot use that value unconditionally: if the magnitude
7448  // of the input value is small, the bit-twiddling we did above might
7449  // end up visibly changing the output. Fortunately, in that case, we
7450  // don't need to twiddle bits since the original input will convert
7451  // exactly to double-precision floating-point already. Therefore,
7452  // construct a conditional to use the original value if the top 11
7453  // bits are all sign-bit copies, and use the rounded value computed
7454  // above otherwise.
7455  SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
7456  SINT, DAG.getConstant(53, dl, MVT::i32));
7457  Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
7458  Cond, DAG.getConstant(1, dl, MVT::i64));
7459  Cond = DAG.getSetCC(dl, MVT::i32,
7460  Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
7461 
7462  SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
7463  }
7464 
7465  ReuseLoadInfo RLI;
7466  SDValue Bits;
7467 
7468  MachineFunction &MF = DAG.getMachineFunction();
7469  if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
7470  Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
7471  RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
7472  spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7473  } else if (Subtarget.hasLFIWAX() &&
7474  canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
7475  MachineMemOperand *MMO =
7477  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7478  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7479  Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
7481  Ops, MVT::i32, MMO);
7482  spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7483  } else if (Subtarget.hasFPCVT() &&
7484  canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
7485  MachineMemOperand *MMO =
7487  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7488  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7489  Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
7491  Ops, MVT::i32, MMO);
7492  spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7493  } else if (((Subtarget.hasLFIWAX() &&
7494  SINT.getOpcode() == ISD::SIGN_EXTEND) ||
7495  (Subtarget.hasFPCVT() &&
7496  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
7497  SINT.getOperand(0).getValueType() == MVT::i32) {
7498  MachineFrameInfo &MFI = MF.getFrameInfo();
7499  EVT PtrVT = getPointerTy(DAG.getDataLayout());
7500 
7501  int FrameIdx = MFI.CreateStackObject(4, 4, false);
7502  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7503 
7504  SDValue Store =
7505  DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
7507  DAG.getMachineFunction(), FrameIdx));
7508 
7509  assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
7510  "Expected an i32 store");
7511 
7512  RLI.Ptr = FIdx;
7513  RLI.Chain = Store;
7514  RLI.MPI =
7516  RLI.Alignment = 4;
7517 
7518  MachineMemOperand *MMO =
7520  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7521  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7522  Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
7524  dl, DAG.getVTList(MVT::f64, MVT::Other),
7525  Ops, MVT::i32, MMO);
7526  } else
7527  Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
7528 
7529  SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
7530 
7531  if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
7532  FP = DAG.getNode(ISD::FP_ROUND, dl,
7533  MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
7534  return FP;
7535  }
7536 
7537  assert(Op.getOperand(0).getValueType() == MVT::i32 &&
7538  "Unhandled INT_TO_FP type in custom expander!");
7539  // Since we only generate this in 64-bit mode, we can take advantage of
7540  // 64-bit registers. In particular, sign extend the input value into the
7541  // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
7542  // then lfd it and fcfid it.
7543  MachineFunction &MF = DAG.getMachineFunction();
7544  MachineFrameInfo &MFI = MF.getFrameInfo();
7545  EVT PtrVT = getPointerTy(MF.getDataLayout());
7546 
7547  SDValue Ld;
7548  if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
7549  ReuseLoadInfo RLI;
7550  bool ReusingLoad;
7551  if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
7552  DAG))) {
7553  int FrameIdx = MFI.CreateStackObject(4, 4, false);
7554  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7555 
7556  SDValue Store =
7557  DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
7559  DAG.getMachineFunction(), FrameIdx));
7560 
7561  assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
7562  "Expected an i32 store");
7563 
7564  RLI.Ptr = FIdx;
7565  RLI.Chain = Store;
7566  RLI.MPI =
7568  RLI.Alignment = 4;
7569  }
7570 
7571  MachineMemOperand *MMO =
7572  MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7573  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7574  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7575  Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
7577  dl, DAG.getVTList(MVT::f64, MVT::Other),
7578  Ops, MVT::i32, MMO);
7579  if (ReusingLoad)
7580  spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
7581  } else {
7582  assert(Subtarget.isPPC64() &&
7583  "i32->FP without LFIWAX supported only on PPC64");
7584 
7585  int FrameIdx = MFI.CreateStackObject(8, 8, false);
7586  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7587 
7588  SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
7589  Op.getOperand(0));
7590 
7591  // STD the extended value into the stack slot.
7592  SDValue Store = DAG.getStore(
7593  DAG.getEntryNode(), dl, Ext64, FIdx,
7595 
7596  // Load the value as a double.
7597  Ld = DAG.getLoad(
7598  MVT::f64, dl, Store, FIdx,
7600  }
7601 
7602  // FCFID it and return it.
7603  SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
7604  if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
7605  FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
7606  DAG.getIntPtrConstant(0, dl));
7607  return FP;
7608 }
7609 
7610 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
7611  SelectionDAG &DAG) const {
7612  SDLoc dl(Op);
7613  /*
7614  The rounding mode is in bits 30:31 of FPSR, and has the following
7615  settings:
7616  00 Round to nearest
7617  01 Round to 0
7618  10 Round to +inf
7619  11 Round to -inf
7620 
7621  FLT_ROUNDS, on the other hand, expects the following:
7622  -1 Undefined
7623  0 Round to 0
7624  1 Round to nearest
7625  2 Round to +inf
7626  3 Round to -inf
7627 
7628  To perform the conversion, we do:
7629  ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
7630  */
7631 
7632  MachineFunction &MF = DAG.getMachineFunction();
7633  EVT VT = Op.getValueType();
7634  EVT PtrVT = getPointerTy(MF.getDataLayout());
7635 
7636  // Save FP Control Word to register
7637  EVT NodeTys[] = {
7638  MVT::f64, // return register
7639  MVT::Glue // unused in this context
7640  };
7641  SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
7642 
7643  // Save FP register to stack slot
7644  int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false);
7645  SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
7646  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot,
7647  MachinePointerInfo());
7648 
7649  // Load FP Control Word from low 32 bits of stack slot.
7650  SDValue Four = DAG.getConstant(4, dl, PtrVT);
7651  SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
7652  SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo());
7653 
7654  // Transform as necessary
7655  SDValue CWD1 =
7656  DAG.getNode(ISD::AND, dl, MVT::i32,
7657  CWD, DAG.getConstant(3, dl, MVT::i32));
7658  SDValue CWD2 =
7659  DAG.getNode(ISD::SRL, dl, MVT::i32,
7660  DAG.getNode(ISD::AND, dl, MVT::i32,
7661  DAG.getNode(ISD::XOR, dl, MVT::i32,
7662  CWD, DAG.getConstant(3, dl, MVT::i32)),
7663  DAG.getConstant(3, dl, MVT::i32)),
7664  DAG.getConstant(1, dl, MVT::i32));
7665 
7666  SDValue RetVal =
7667  DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
7668 
7669  return DAG.getNode((VT.getSizeInBits() < 16 ?
7670  ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
7671 }
7672 
7673 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
7674  EVT VT = Op.getValueType();
7675  unsigned BitWidth = VT.getSizeInBits();
7676  SDLoc dl(Op);
7677  assert(Op.getNumOperands() == 3 &&
7678  VT == Op.getOperand(1).getValueType() &&
7679  "Unexpected SHL!");
7680 
7681  // Expand into a bunch of logical ops. Note that these ops
7682  // depend on the PPC behavior for oversized shift amounts.
7683  SDValue Lo = Op.getOperand(0);
7684  SDValue Hi = Op.getOperand(1);
7685  SDValue Amt = Op.getOperand(2);
7686  EVT AmtVT = Amt.getValueType();
7687 
7688  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
7689  DAG.getConstant(BitWidth, dl, AmtVT), Amt);
7690  SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
7691  SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
7692  SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
7693  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7694  DAG.getConstant(-BitWidth, dl, AmtVT));
7695  SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
7696  SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
7697  SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
7698  SDValue OutOps[] = { OutLo, OutHi };
7699  return DAG.getMergeValues(OutOps, dl);
7700 }
7701 
7702 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
7703  EVT VT = Op.getValueType();
7704  SDLoc dl(Op);
7705  unsigned BitWidth = VT.getSizeInBits();
7706  assert(Op.getNumOperands() == 3 &&
7707  VT == Op.getOperand(1).getValueType() &&
7708  "Unexpected SRL!");
7709 
7710  // Expand into a bunch of logical ops. Note that these ops
7711  // depend on the PPC behavior for oversized shift amounts.
7712  SDValue Lo = Op.getOperand(0);
7713  SDValue Hi = Op.getOperand(1);
7714  SDValue Amt = Op.getOperand(2);
7715  EVT AmtVT = Amt.getValueType();
7716 
7717  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
7718  DAG.getConstant(BitWidth, dl, AmtVT), Amt);
7719  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
7720  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
7721  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
7722  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7723  DAG.getConstant(-BitWidth, dl, AmtVT));
7724  SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
7725  SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
7726  SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
7727  SDValue OutOps[] = { OutLo, OutHi };
7728  return DAG.getMergeValues(OutOps, dl);
7729 }
7730 
7731 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
7732  SDLoc dl(Op);
7733  EVT VT = Op.getValueType();
7734  unsigned BitWidth = VT.getSizeInBits();
7735  assert(Op.getNumOperands() == 3 &&
7736  VT == Op.getOperand(1).getValueType() &&
7737  "Unexpected SRA!");
7738 
7739  // Expand into a bunch of logical ops, followed by a select_cc.
7740  SDValue Lo = Op.getOperand(0);
7741  SDValue Hi = Op.getOperand(1);
7742  SDValue Amt = Op.getOperand(2);
7743  EVT AmtVT = Amt.getValueType();
7744 
7745  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
7746  DAG.getConstant(BitWidth, dl, AmtVT), Amt);
7747  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
7748  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
7749  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
7750  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7751  DAG.getConstant(-BitWidth, dl, AmtVT));
7752  SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
7753  SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
7754  SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
7755  Tmp4, Tmp6, ISD::SETLE);
7756  SDValue OutOps[] = { OutLo, OutHi };
7757  return DAG.getMergeValues(OutOps, dl);
7758 }
7759 
7760 //===----------------------------------------------------------------------===//
7761 // Vector related lowering.
7762 //
7763 
7764 /// BuildSplatI - Build a canonical splati of Val with an element size of
7765 /// SplatSize. Cast the result to VT.
7766 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
7767  SelectionDAG &DAG, const SDLoc &dl) {
7768  assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
7769 
7770  static const MVT VTys[] = { // canonical VT to use for each size.
7772  };
7773 
7774  EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
7775 
7776  // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
7777  if (Val == -1)
7778  SplatSize = 1;
7779 
7780  EVT CanonicalVT = VTys[SplatSize-1];
7781 
7782  // Build a canonical splat for this value.
7783  return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
7784 }
7785 
7786 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
7787 /// specified intrinsic ID.
7788 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
7789  const SDLoc &dl, EVT DestVT = MVT::Other) {
7790  if (DestVT == MVT::Other) DestVT = Op.getValueType();
7791  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
7792  DAG.getConstant(IID, dl, MVT::i32), Op);
7793 }
7794 
7795 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
7796 /// specified intrinsic ID.
7797 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
7798  SelectionDAG &DAG, const SDLoc &dl,
7799  EVT DestVT = MVT::Other) {
7800  if (DestVT == MVT::Other) DestVT = LHS.getValueType();
7801  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
7802  DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
7803 }
7804 
7805 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
7806 /// specified intrinsic ID.
7807 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
7808  SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
7809  EVT DestVT = MVT::Other) {
7810  if (DestVT == MVT::Other) DestVT = Op0.getValueType();
7811  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
7812  DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
7813 }
7814 
7815 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
7816 /// amount. The result has the specified value type.
7817 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
7818  SelectionDAG &DAG, const SDLoc &dl) {
7819  // Force LHS/RHS to be the right type.
7820  LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
7821  RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
7822 
7823  int Ops[16];
7824  for (unsigned i = 0; i != 16; ++i)
7825  Ops[i] = i + Amt;
7826  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
7827  return DAG.getNode(ISD::BITCAST, dl, VT, T);
7828 }
7829 
7830 /// Do we have an efficient pattern in a .td file for this node?
7831 ///
7832 /// \param V - pointer to the BuildVectorSDNode being matched
7833 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
7834 ///
7835 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR
7836 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where
7837 /// the opposite is true (expansion is beneficial) are:
7838 /// - The node builds a vector out of integers that are not 32 or 64-bits
7839 /// - The node builds a vector out of constants
7840 /// - The node is a "load-and-splat"
7841 /// In all other cases, we will choose to keep the BUILD_VECTOR.
7843  bool HasDirectMove,
7844  bool HasP8Vector) {
7845  EVT VecVT = V->getValueType(0);
7846  bool RightType = VecVT == MVT::v2f64 ||
7847  (HasP8Vector && VecVT == MVT::v4f32) ||
7848  (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
7849  if (!RightType)
7850  return false;
7851 
7852  bool IsSplat = true;
7853  bool IsLoad = false;
7854  SDValue Op0 = V->getOperand(0);
7855 
7856  // This function is called in a block that confirms the node is not a constant
7857  // splat. So a constant BUILD_VECTOR here means the vector is built out of
7858  // different constants.
7859  if (V->isConstant())
7860  return false;
7861  for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
7862  if (V->getOperand(i).isUndef())
7863  return false;
7864  // We want to expand nodes that represent load-and-splat even if the
7865  // loaded value is a floating point truncation or conversion to int.
7866  if (V->getOperand(i).getOpcode() == ISD::LOAD ||
7867  (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
7868  V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
7869  (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
7870  V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
7871  (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
7872  V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
7873  IsLoad = true;
7874  // If the operands are different or the input is not a load and has more
7875  // uses than just this BV node, then it isn't a splat.
7876  if (V->getOperand(i) != Op0 ||
7877  (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
7878  IsSplat = false;
7879  }
7880  return !(IsSplat && IsLoad);
7881 }
7882 
7883 // Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
7884 SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
7885 
7886  SDLoc dl(Op);
7887  SDValue Op0 = Op->getOperand(0);
7888 
7889  if (!EnableQuadPrecision ||
7890  (Op.getValueType() != MVT::f128 ) ||
7891  (Op0.getOpcode() != ISD::BUILD_PAIR) ||
7892  (Op0.getOperand(0).getValueType() != MVT::i64) ||
7893  (Op0.getOperand(1).getValueType() != MVT::i64))
7894  return SDValue();
7895 
7896  return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
7897  Op0.getOperand(1));
7898 }
7899 
7900 // If this is a case we can't handle, return null and let the default
7901 // expansion code take care of it. If we CAN select this case, and if it
7902 // selects to a single instruction, return Op. Otherwise, if we can codegen
7903 // this case more efficiently than a constant pool load, lower it to the
7904 // sequence of ops that should be used.
7905 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
7906  SelectionDAG &DAG) const {
7907  SDLoc dl(Op);
7909  assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
7910 
7911  if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) {
7912  // We first build an i32 vector, load it into a QPX register,
7913  // then convert it to a floating-point vector and compare it
7914  // to a zero vector to get the boolean result.
7916  int FrameIdx = MFI.CreateStackObject(16, 16, false);
7917  MachinePointerInfo PtrInfo =
7919  EVT PtrVT = getPointerTy(DAG.getDataLayout());
7920  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7921 
7922  assert(BVN->getNumOperands() == 4 &&
7923  "BUILD_VECTOR for v4i1 does not have 4 operands");
7924 
7925  bool IsConst = true;
7926  for (unsigned i = 0; i < 4; ++i) {
7927  if (BVN->getOperand(i).isUndef()) continue;
7928  if (!isa<ConstantSDNode>(BVN->getOperand(i))) {
7929  IsConst = false;
7930  break;
7931  }
7932  }
7933 
7934  if (IsConst) {
7935  Constant *One =
7937  Constant *NegOne =
7939 
7940  Constant *CV[4];
7941  for (unsigned i = 0; i < 4; ++i) {
7942  if (BVN->getOperand(i).isUndef())
7943  CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
7944  else if (isNullConstant(BVN->getOperand(i)))
7945  CV[i] = NegOne;
7946  else
7947  CV[i] = One;
7948  }
7949 
7951  SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()),
7952  16 /* alignment */);
7953 
7954  SDValue Ops[] = {DAG.getEntryNode(), CPIdx};
7955  SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other});
7956  return DAG.getMemIntrinsicNode(
7957  PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32,
7959  }
7960 
7961  SmallVector<SDValue, 4> Stores;
7962  for (unsigned i = 0; i < 4; ++i) {
7963  if (BVN->getOperand(i).isUndef()) continue;
7964 
7965  unsigned Offset = 4*i;
7966  SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
7967  Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
7968 
7969  unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize();
7970  if (StoreSize > 4) {
7971  Stores.push_back(
7972  DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx,
7973  PtrInfo.getWithOffset(Offset), MVT::i32));
7974  } else {
7975  SDValue StoreValue = BVN->getOperand(i);
7976  if (StoreSize < 4)
7977  StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue);
7978 
7979  Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx,
7980  PtrInfo.getWithOffset(Offset)));
7981  }
7982  }
7983 
7984  SDValue StoreChain;
7985  if (!Stores.empty())
7986  StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
7987  else
7988  StoreChain = DAG.getEntryNode();
7989 
7990  // Now load from v4i32 into the QPX register; this will extend it to
7991  // v4i64 but not yet convert it to a floating point. Nevertheless, this
7992  // is typed as v4f64 because the QPX register integer states are not
7993  // explicitly represented.
7994 
7995  SDValue Ops[] = {StoreChain,
7997  FIdx};
7998  SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other});
7999 
8001  dl, VTs, Ops, MVT::v4i32, PtrInfo);
8002  LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
8004  LoadedVect);
8005 
8006  SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64);
8007 
8008  return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ);
8009  }
8010 
8011  // All other QPX vectors are handled by generic code.
8012  if (Subtarget.hasQPX())
8013  return SDValue();
8014 
8015  // Check if this is a splat of a constant value.
8016  APInt APSplatBits, APSplatUndef;
8017  unsigned SplatBitSize;
8018  bool HasAnyUndefs;
8019  if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
8020  HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
8021  SplatBitSize > 32) {
8022  // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
8023  // lowered to VSX instructions under certain conditions.
8024  // Without VSX, there is no pattern more efficient than expanding the node.
8025  if (Subtarget.hasVSX() &&
8026  haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
8027  Subtarget.hasP8Vector()))
8028  return Op;
8029  return SDValue();
8030  }
8031 
8032  unsigned SplatBits = APSplatBits.getZExtValue();
8033  unsigned SplatUndef = APSplatUndef.getZExtValue();
8034  unsigned SplatSize = SplatBitSize / 8;
8035 
8036  // First, handle single instruction cases.
8037 
8038  // All zeros?
8039  if (SplatBits == 0) {
8040  // Canonicalize all zero vectors to be v4i32.
8041  if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
8042  SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
8043  Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
8044  }
8045  return Op;
8046  }
8047 
8048  // We have XXSPLTIB for constant splats one byte wide
8049  if (Subtarget.hasP9Vector() && SplatSize == 1) {
8050  // This is a splat of 1-byte elements with some elements potentially undef.
8051  // Rather than trying to match undef in the SDAG patterns, ensure that all
8052  // elements are the same constant.
8053  if (HasAnyUndefs || ISD::isBuildVectorAllOnes(BVN)) {
8054  SmallVector<SDValue, 16> Ops(16, DAG.getConstant(SplatBits,
8055  dl, MVT::i32));
8056  SDValue NewBV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
8057  if (Op.getValueType() != MVT::v16i8)
8058  return DAG.getBitcast(Op.getValueType(), NewBV);
8059  return NewBV;
8060  }
8061 
8062  // BuildVectorSDNode::isConstantSplat() is actually pretty smart. It'll
8063  // detect that constant splats like v8i16: 0xABAB are really just splats
8064  // of a 1-byte constant. In this case, we need to convert the node to a
8065  // splat of v16i8 and a bitcast.
8066  if (Op.getValueType() != MVT::v16i8)
8067  return DAG.getBitcast(Op.getValueType(),
8068  DAG.getConstant(SplatBits, dl, MVT::v16i8));
8069 
8070  return Op;
8071  }
8072 
8073  // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
8074  int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
8075  (32-SplatBitSize));
8076  if (SextVal >= -16 && SextVal <= 15)
8077  return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
8078 
8079  // Two instruction sequences.
8080 
8081  // If this value is in the range [-32,30] and is even, use:
8082  // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
8083  // If this value is in the range [17,31] and is odd, use:
8084  // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
8085  // If this value is in the range [-31,-17] and is odd, use:
8086  // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
8087  // Note the last two are three-instruction sequences.
8088  if (SextVal >= -32 && SextVal <= 31) {
8089  // To avoid having these optimizations undone by constant folding,
8090  // we convert to a pseudo that will be expanded later into one of
8091  // the above forms.
8092  SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
8093  EVT VT = (SplatSize == 1 ? MVT::v16i8 :
8094  (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
8095  SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
8096  SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
8097  if (VT == Op.getValueType())
8098  return RetVal;
8099  else
8100  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
8101  }
8102 
8103  // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
8104  // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
8105  // for fneg/fabs.
8106  if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
8107  // Make -1 and vspltisw -1:
8108  SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
8109 
8110  // Make the VSLW intrinsic, computing 0x8000_0000.
8112  OnesV, DAG, dl);
8113 
8114  // xor by OnesV to invert it.
8115  Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
8116  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8117  }
8118 
8119  // Check to see if this is a wide variety of vsplti*, binop self cases.
8120  static const signed char SplatCsts[] = {
8121  -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
8122  -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
8123  };
8124 
8125  for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
8126  // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
8127  // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
8128  int i = SplatCsts[idx];
8129 
8130  // Figure out what shift amount will be used by altivec if shifted by i in
8131  // this splat size.
8132  unsigned TypeShiftAmt = i & (SplatBitSize-1);
8133 
8134  // vsplti + shl self.
8135  if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
8136  SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
8137  static const unsigned IIDs[] = { // Intrinsic to use for each size.
8140  };
8141  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8142  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8143  }
8144 
8145  // vsplti + srl self.
8146  if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
8147  SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
8148  static const unsigned IIDs[] = { // Intrinsic to use for each size.
8151  };
8152  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8153  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8154  }
8155 
8156  // vsplti + sra self.
8157  if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
8158  SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
8159  static const unsigned IIDs[] = { // Intrinsic to use for each size.
8162  };
8163  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8164  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8165  }
8166 
8167  // vsplti + rol self.
8168  if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
8169  ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
8170  SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
8171  static const unsigned IIDs[] = { // Intrinsic to use for each size.
8174  };
8175  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8176  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8177  }
8178 
8179  // t = vsplti c, result = vsldoi t, t, 1
8180  if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
8181  SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
8182  unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
8183  return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8184  }
8185  // t = vsplti c, result = vsldoi t, t, 2
8186  if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
8187  SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
8188  unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
8189  return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8190  }
8191  // t = vsplti c, result = vsldoi t, t, 3
8192  if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
8193  SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
8194  unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
8195  return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8196  }
8197  }
8198 
8199  return SDValue();
8200 }
8201 
8202 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8203 /// the specified operations to build the shuffle.
8204 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
8205  SDValue RHS, SelectionDAG &DAG,
8206  const SDLoc &dl) {
8207  unsigned OpNum = (PFEntry >> 26) & 0x0F;
8208  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8209  unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8210 
8211  enum {
8212  OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
8213  OP_VMRGHW,
8214  OP_VMRGLW,
8215  OP_VSPLTISW0,
8216  OP_VSPLTISW1,
8217  OP_VSPLTISW2,
8218  OP_VSPLTISW3,
8219  OP_VSLDOI4,
8220  OP_VSLDOI8,
8221  OP_VSLDOI12
8222  };
8223 
8224  if (OpNum == OP_COPY) {
8225  if (LHSID == (1*9+2)*9+3) return LHS;
8226  assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
8227  return RHS;
8228  }
8229 
8230  SDValue OpLHS, OpRHS;
8231  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
8232  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
8233 
8234  int ShufIdxs[16];
8235  switch (OpNum) {
8236  default: llvm_unreachable("Unknown i32 permute!");
8237  case OP_VMRGHW:
8238  ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
8239  ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
8240  ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
8241  ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
8242  break;
8243  case OP_VMRGLW:
8244  ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
8245  ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
8246  ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
8247  ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
8248  break;
8249  case OP_VSPLTISW0:
8250  for (unsigned i = 0; i != 16; ++i)
8251  ShufIdxs[i] = (i&3)+0;
8252  break;
8253  case OP_VSPLTISW1:
8254  for (unsigned i = 0; i != 16; ++i)
8255  ShufIdxs[i] = (i&3)+4;
8256  break;
8257  case OP_VSPLTISW2:
8258  for (unsigned i = 0; i != 16; ++i)
8259  ShufIdxs[i] = (i&3)+8;
8260  break;
8261  case OP_VSPLTISW3:
8262  for (unsigned i = 0; i != 16; ++i)
8263  ShufIdxs[i] = (i&3)+12;
8264  break;
8265  case OP_VSLDOI4:
8266  return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
8267  case OP_VSLDOI8:
8268  return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
8269  case OP_VSLDOI12:
8270  return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
8271  }
8272  EVT VT = OpLHS.getValueType();
8273  OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
8274  OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
8275  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
8276  return DAG.getNode(ISD::BITCAST, dl, VT, T);
8277 }
8278 
8279 /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
8280 /// by the VINSERTB instruction introduced in ISA 3.0, else just return default
8281 /// SDValue.
8282 SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
8283  SelectionDAG &DAG) const {
8284  const unsigned BytesInVector = 16;
8285  bool IsLE = Subtarget.isLittleEndian();
8286  SDLoc dl(N);
8287  SDValue V1 = N->getOperand(0);
8288  SDValue V2 = N->getOperand(1);
8289  unsigned ShiftElts = 0, InsertAtByte = 0;
8290  bool Swap = false;
8291 
8292  // Shifts required to get the byte we want at element 7.
8293  unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
8294  0, 15, 14, 13, 12, 11, 10, 9};
8295  unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
8296  1, 2, 3, 4, 5, 6, 7, 8};
8297 
8298  ArrayRef<int> Mask = N->getMask();
8299  int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
8300 
8301  // For each mask element, find out if we're just inserting something
8302  // from V2 into V1 or vice versa.
8303  // Possible permutations inserting an element from V2 into V1:
8304  // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
8305  // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
8306  // ...
8307  // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
8308  // Inserting from V1 into V2 will be similar, except mask range will be
8309  // [16,31].
8310 
8311  bool FoundCandidate = false;
8312  // If both vector operands for the shuffle are the same vector, the mask
8313  // will contain only elements from the first one and the second one will be
8314  // undef.
8315  unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
8316  // Go through the mask of half-words to find an element that's being moved
8317  // from one vector to the other.
8318  for (unsigned i = 0; i < BytesInVector; ++i) {
8319  unsigned CurrentElement = Mask[i];
8320  // If 2nd operand is undefined, we should only look for element 7 in the
8321  // Mask.
8322  if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
8323  continue;
8324 
8325  bool OtherElementsInOrder = true;
8326  // Examine the other elements in the Mask to see if they're in original
8327  // order.
8328  for (unsigned j = 0; j < BytesInVector; ++j) {
8329  if (j == i)
8330  continue;
8331  // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
8332  // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
8333  // in which we always assume we're always picking from the 1st operand.
8334  int MaskOffset =
8335  (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
8336  if (Mask[j] != OriginalOrder[j] + MaskOffset) {
8337  OtherElementsInOrder = false;
8338  break;
8339  }
8340  }
8341  // If other elements are in original order, we record the number of shifts
8342  // we need to get the element we want into element 7. Also record which byte
8343  // in the vector we should insert into.
8344  if (OtherElementsInOrder) {
8345  // If 2nd operand is undefined, we assume no shifts and no swapping.
8346  if (V2.isUndef()) {
8347  ShiftElts = 0;
8348  Swap = false;
8349  } else {
8350  // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
8351  ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
8352  : BigEndianShifts[CurrentElement & 0xF];
8353  Swap = CurrentElement < BytesInVector;
8354  }
8355  InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
8356  FoundCandidate = true;
8357  break;
8358  }
8359  }
8360 
8361  if (!FoundCandidate)
8362  return SDValue();
8363 
8364  // Candidate found, construct the proper SDAG sequence with VINSERTB,
8365  // optionally with VECSHL if shift is required.
8366  if (Swap)
8367  std::swap(V1, V2);
8368  if (V2.isUndef())
8369  V2 = V1;
8370  if (ShiftElts) {
8371  SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
8372  DAG.getConstant(ShiftElts, dl, MVT::i32));
8373  return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
8374  DAG.getConstant(InsertAtByte, dl, MVT::i32));
8375  }
8376  return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
8377  DAG.getConstant(InsertAtByte, dl, MVT::i32));
8378 }
8379 
8380 /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
8381 /// by the VINSERTH instruction introduced in ISA 3.0, else just return default
8382 /// SDValue.
8383 SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
8384  SelectionDAG &DAG) const {
8385  const unsigned NumHalfWords = 8;
8386  const unsigned BytesInVector = NumHalfWords * 2;
8387  // Check that the shuffle is on half-words.
8388  if (!isNByteElemShuffleMask(N, 2, 1))
8389  return SDValue();
8390 
8391  bool IsLE = Subtarget.isLittleEndian();
8392  SDLoc dl(N);
8393  SDValue V1 = N->getOperand(0);
8394  SDValue V2 = N->getOperand(1);
8395  unsigned ShiftElts = 0, InsertAtByte = 0;
8396  bool Swap = false;
8397 
8398  // Shifts required to get the half-word we want at element 3.
8399  unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
8400  unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
8401 
8402  uint32_t Mask = 0;
8403  uint32_t OriginalOrderLow = 0x1234567;
8404  uint32_t OriginalOrderHigh = 0x89ABCDEF;
8405  // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
8406  // 32-bit space, only need 4-bit nibbles per element.
8407  for (unsigned i = 0; i < NumHalfWords; ++i) {
8408  unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
8409  Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
8410  }
8411 
8412  // For each mask element, find out if we're just inserting something
8413  // from V2 into V1 or vice versa. Possible permutations inserting an element
8414  // from V2 into V1:
8415  // X, 1, 2, 3, 4, 5, 6, 7
8416  // 0, X, 2, 3, 4, 5, 6, 7
8417  // 0, 1, X, 3, 4, 5, 6, 7
8418  // 0, 1, 2, X, 4, 5, 6, 7
8419  // 0, 1, 2, 3, X, 5, 6, 7
8420  // 0, 1, 2, 3, 4, X, 6, 7
8421  // 0, 1, 2, 3, 4, 5, X, 7
8422  // 0, 1, 2, 3, 4, 5, 6, X
8423  // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
8424 
8425  bool FoundCandidate = false;
8426  // Go through the mask of half-words to find an element that's being moved
8427  // from one vector to the other.
8428  for (unsigned i = 0; i < NumHalfWords; ++i) {
8429  unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
8430  uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
8431  uint32_t MaskOtherElts = ~(0xF << MaskShift);
8432  uint32_t TargetOrder = 0x0;
8433 
8434  // If both vector operands for the shuffle are the same vector, the mask
8435  // will contain only elements from the first one and the second one will be
8436  // undef.
8437  if (V2.isUndef()) {
8438  ShiftElts = 0;
8439  unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
8440  TargetOrder = OriginalOrderLow;
8441  Swap = false;
8442  // Skip if not the correct element or mask of other elements don't equal
8443  // to our expected order.
8444  if (MaskOneElt == VINSERTHSrcElem &&
8445  (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
8446  InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
8447  FoundCandidate = true;
8448  break;
8449  }
8450  } else { // If both operands are defined.
8451  // Target order is [8,15] if the current mask is between [0,7].
8452  TargetOrder =
8453  (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
8454  // Skip if mask of other elements don't equal our expected order.
8455  if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
8456  // We only need the last 3 bits for the number of shifts.
8457  ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
8458  : BigEndianShifts[MaskOneElt & 0x7];
8459  InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
8460  Swap = MaskOneElt < NumHalfWords;
8461  FoundCandidate = true;
8462  break;
8463  }
8464  }
8465  }
8466 
8467  if (!FoundCandidate)
8468  return SDValue();
8469 
8470  // Candidate found, construct the proper SDAG sequence with VINSERTH,
8471  // optionally with VECSHL if shift is required.
8472  if (Swap)
8473  std::swap(V1, V2);
8474  if (V2.isUndef())
8475  V2 = V1;
8476  SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
8477  if (ShiftElts) {
8478  // Double ShiftElts because we're left shifting on v16i8 type.
8479  SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
8480  DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
8481  SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
8482  SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
8483  DAG.getConstant(InsertAtByte, dl, MVT::i32));
8484  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
8485  }
8486  SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
8487  SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
8488  DAG.getConstant(InsertAtByte, dl, MVT::i32));
8489  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
8490 }
8491 
8492 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
8493 /// is a shuffle we can handle in a single instruction, return it. Otherwise,
8494 /// return the code it can be lowered into. Worst case, it can always be
8495 /// lowered into a vperm.
8496 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
8497  SelectionDAG &DAG) const {
8498  SDLoc dl(Op);
8499  SDValue V1 = Op.getOperand(0);
8500  SDValue V2 = Op.getOperand(1);
8501  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
8502  EVT VT = Op.getValueType();
8503  bool isLittleEndian = Subtarget.isLittleEndian();
8504 
8505  unsigned ShiftElts, InsertAtByte;
8506  bool Swap = false;
8507  if (Subtarget.hasP9Vector() &&
8508  PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
8509  isLittleEndian)) {
8510  if (Swap)
8511  std::swap(V1, V2);
8512  SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
8513  SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
8514  if (ShiftElts) {
8515  SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
8516  DAG.getConstant(ShiftElts, dl, MVT::i32));
8517  SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
8518  DAG.getConstant(InsertAtByte, dl, MVT::i32));
8519  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
8520  }
8521  SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
8522  DAG.getConstant(InsertAtByte, dl, MVT::i32));
8523  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
8524  }
8525 
8526  if (Subtarget.hasP9Altivec()) {
8527  SDValue NewISDNode;
8528  if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
8529  return NewISDNode;
8530 
8531  if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
8532  return NewISDNode;
8533  }
8534 
8535  if (Subtarget.hasVSX() &&
8536  PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
8537  if (Swap)
8538  std::swap(V1, V2);
8539  SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
8540  SDValue Conv2 =
8541  DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
8542 
8543  SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
8544  DAG.getConstant(ShiftElts, dl, MVT::i32));
8545  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
8546  }
8547 
8548  if (Subtarget.hasVSX() &&
8549  PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
8550  if (Swap)
8551  std::swap(V1, V2);
8552  SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
8553  SDValue Conv2 =
8554  DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
8555 
8556  SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
8557  DAG.getConstant(ShiftElts, dl, MVT::i32));
8558  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
8559  }
8560 
8561  if (Subtarget.hasP9Vector()) {
8562  if (PPC::isXXBRHShuffleMask(SVOp)) {
8563  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
8564  SDValue ReveHWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v8i16, Conv);
8565  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
8566  } else if (PPC::isXXBRWShuffleMask(SVOp)) {
8567  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
8568  SDValue ReveWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v4i32, Conv);
8569  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
8570  } else if (PPC::isXXBRDShuffleMask(SVOp)) {
8571  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
8572  SDValue ReveDWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Conv);
8573  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
8574  } else if (PPC::isXXBRQShuffleMask(SVOp)) {
8575  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
8576  SDValue ReveQWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v1i128, Conv);
8577  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
8578  }
8579  }
8580 
8581  if (Subtarget.hasVSX()) {
8582  if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
8583  int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
8584 
8585  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
8586  SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
8587  DAG.getConstant(SplatIdx, dl, MVT::i32));
8588  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
8589  }
8590 
8591  // Left shifts of 8 bytes are actually swaps. Convert accordingly.
8592  if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
8593  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
8594  SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
8595  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
8596  }
8597  }
8598 
8599  if (Subtarget.hasQPX()) {
8600  if (VT.getVectorNumElements() != 4)
8601  return SDValue();
8602 
8603  if (V2.isUndef()) V2 = V1;
8604 
8605  int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp);
8606  if (AlignIdx != -1) {
8607  return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2,
8608  DAG.getConstant(AlignIdx, dl, MVT::i32));
8609  } else if (SVOp->isSplat()) {
8610  int SplatIdx = SVOp->getSplatIndex();
8611  if (SplatIdx >= 4) {
8612  std::swap(V1, V2);
8613  SplatIdx -= 4;
8614  }
8615 
8616  return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
8617  DAG.getConstant(SplatIdx, dl, MVT::i32));
8618  }
8619 
8620  // Lower this into a qvgpci/qvfperm pair.
8621 
8622  // Compute the qvgpci literal
8623  unsigned idx = 0;
8624  for (unsigned i = 0; i < 4; ++i) {
8625  int m = SVOp->getMaskElt(i);
8626  unsigned mm = m >= 0 ? (unsigned) m : i;
8627  idx |= mm << (3-i)*3;
8628  }
8629 
8630  SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64,
8631  DAG.getConstant(idx, dl, MVT::i32));
8632  return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3);
8633  }
8634 
8635  // Cases that are handled by instructions that take permute immediates
8636  // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
8637  // selected by the instruction selector.
8638  if (V2.isUndef()) {
8639  if (PPC::isSplatShuffleMask(SVOp, 1) ||
8640  PPC::isSplatShuffleMask(SVOp, 2) ||
8641  PPC::isSplatShuffleMask(SVOp, 4) ||
8642  PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
8643  PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
8644  PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
8645  PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
8646  PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
8647  PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
8648  PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
8649  PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
8650  PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
8651  (Subtarget.hasP8Altivec() && (
8652  PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
8653  PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
8654  PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
8655  return Op;
8656  }
8657  }
8658 
8659  // Altivec has a variety of "shuffle immediates" that take two vector inputs
8660  // and produce a fixed permutation. If any of these match, do not lower to
8661  // VPERM.
8662  unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
8663  if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8664  PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8665  PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
8666  PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
8667  PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
8668  PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
8669  PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
8670  PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
8671  PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
8672  (Subtarget.hasP8Altivec() && (
8673  PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8674  PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
8675  PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
8676  return Op;
8677 
8678  // Check to see if this is a shuffle of 4-byte values. If so, we can use our
8679  // perfect shuffle table to emit an optimal matching sequence.
8680  ArrayRef<int> PermMask = SVOp->getMask();
8681 
8682  unsigned PFIndexes[4];
8683  bool isFourElementShuffle = true;
8684  for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
8685  unsigned EltNo = 8; // Start out undef.
8686  for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
8687  if (PermMask[i*4+j] < 0)
8688  continue; // Undef, ignore it.
8689 
8690  unsigned ByteSource = PermMask[i*4+j];
8691  if ((ByteSource & 3) != j) {
8692  isFourElementShuffle = false;
8693  break;
8694  }
8695 
8696  if (EltNo == 8) {
8697  EltNo = ByteSource/4;
8698  } else if (EltNo != ByteSource/4) {
8699  isFourElementShuffle = false;
8700  break;
8701  }
8702  }
8703  PFIndexes[i] = EltNo;
8704  }
8705 
8706  // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
8707  // perfect shuffle vector to determine if it is cost effective to do this as
8708  // discrete instructions, or whether we should use a vperm.
8709  // For now, we skip this for little endian until such time as we have a
8710  // little-endian perfect shuffle table.
8711  if (isFourElementShuffle && !isLittleEndian) {
8712  // Compute the index in the perfect shuffle table.
8713  unsigned PFTableIndex =
8714  PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8715 
8716  unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8717  unsigned Cost = (PFEntry >> 30);
8718 
8719  // Determining when to avoid vperm is tricky. Many things affect the cost
8720  // of vperm, particularly how many times the perm mask needs to be computed.
8721  // For example, if the perm mask can be hoisted out of a loop or is already
8722  // used (perhaps because there are multiple permutes with the same shuffle
8723  // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
8724  // the loop requires an extra register.
8725  //
8726  // As a compromise, we only emit discrete instructions if the shuffle can be
8727  // generated in 3 or fewer operations. When we have loop information
8728  // available, if this block is within a loop, we should avoid using vperm
8729  // for 3-operation perms and use a constant pool load instead.
8730  if (Cost < 3)
8731  return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8732  }
8733 
8734  // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
8735  // vector that will get spilled to the constant pool.
8736  if (V2.isUndef()) V2 = V1;
8737 
8738  // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
8739  // that it is in input element units, not in bytes. Convert now.
8740 
8741  // For little endian, the order of the input vectors is reversed, and
8742  // the permutation mask is complemented with respect to 31. This is
8743  // necessary to produce proper semantics with the big-endian-biased vperm
8744  // instruction.
8745  EVT EltVT = V1.getValueType().getVectorElementType();
8746  unsigned BytesPerElement = EltVT.getSizeInBits()/8;
8747 
8748  SmallVector<SDValue, 16> ResultMask;
8749  for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
8750  unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
8751 
8752  for (unsigned j = 0; j != BytesPerElement; ++j)
8753  if (isLittleEndian)
8754  ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
8755  dl, MVT::i32));
8756  else
8757  ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
8758  MVT::i32));
8759  }
8760 
8761  SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
8762  if (isLittleEndian)
8763  return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
8764  V2, V1, VPermMask);
8765  else
8766  return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
8767  V1, V2, VPermMask);
8768 }
8769 
8770 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
8771 /// vector comparison. If it is, return true and fill in Opc/isDot with
8772 /// information about the intrinsic.
8773 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
8774  bool &isDot, const PPCSubtarget &Subtarget) {
8775  unsigned IntrinsicID =
8776  cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
8777  CompareOpc = -1;
8778  isDot = false;
8779  switch (IntrinsicID) {
8780  default:
8781  return false;
8782  // Comparison predicates.
8784  CompareOpc = 966;
8785  isDot = true;
8786  break;
8788  CompareOpc = 198;
8789  isDot = true;
8790  break;
8792  CompareOpc = 6;
8793  isDot = true;
8794  break;
8796  CompareOpc = 70;
8797  isDot = true;
8798  break;
8800  CompareOpc = 134;
8801  isDot = true;
8802  break;
8804  if (Subtarget.hasP8Altivec()) {
8805  CompareOpc = 199;
8806  isDot = true;
8807  } else
8808  return false;
8809  break;
8816  if (Subtarget.hasP9Altivec()) {
8817  switch (IntrinsicID) {
8818  default:
8819  llvm_unreachable("Unknown comparison intrinsic.");
8821  CompareOpc = 7;
8822  break;
8824  CompareOpc = 71;
8825  break;
8827  CompareOpc = 135;
8828  break;
8830  CompareOpc = 263;
8831  break;
8833  CompareOpc = 327;
8834  break;
8836  CompareOpc = 391;
8837  break;
8838  }
8839  isDot = true;
8840  } else
8841  return false;
8842  break;
8844  CompareOpc = 454;
8845  isDot = true;
8846  break;
8848  CompareOpc = 710;
8849  isDot = true;
8850  break;
8852  CompareOpc = 774;
8853  isDot = true;
8854  break;
8856  CompareOpc = 838;
8857  isDot = true;
8858  break;
8860  CompareOpc = 902;
8861  isDot = true;
8862  break;
8864  if (Subtarget.hasP8Altivec()) {
8865  CompareOpc = 967;
8866  isDot = true;
8867  } else
8868  return false;
8869  break;
8871  CompareOpc = 518;
8872  isDot = true;
8873  break;
8875  CompareOpc = 582;
8876  isDot = true;
8877  break;
8879  CompareOpc = 646;
8880  isDot = true;
8881  break;
8883  if (Subtarget.hasP8Altivec()) {
8884  CompareOpc = 711;
8885  isDot = true;
8886  } else
8887  return false;
8888  break;
8889 
8890  // VSX predicate comparisons use the same infrastructure
8897  if (Subtarget.hasVSX()) {
8898  switch (IntrinsicID) {
8900  CompareOpc = 99;
8901  break;
8903  CompareOpc = 115;
8904  break;
8906  CompareOpc = 107;
8907  break;
8909  CompareOpc = 67;
8910  break;
8912  CompareOpc = 83;
8913  break;
8915  CompareOpc = 75;
8916  break;
8917  }
8918  isDot = true;
8919  } else
8920  return false;
8921  break;
8922 
8923  // Normal Comparisons.
8925  CompareOpc = 966;
8926  break;
8928  CompareOpc = 198;
8929  break;
8931  CompareOpc = 6;
8932  break;
8934  CompareOpc = 70;
8935  break;
8937  CompareOpc = 134;
8938  break;
8940  if (Subtarget.hasP8Altivec())
8941  CompareOpc = 199;
8942  else
8943  return false;
8944  break;
8951  if (Subtarget.hasP9Altivec())
8952  switch (IntrinsicID) {
8953  default:
8954  llvm_unreachable("Unknown comparison intrinsic.");
8956  CompareOpc = 7;
8957  break;
8959  CompareOpc = 71;
8960  break;
8962  CompareOpc = 135;
8963  break;
8965  CompareOpc = 263;
8966  break;
8968  CompareOpc = 327;
8969  break;
8971  CompareOpc = 391;
8972  break;
8973  }
8974  else
8975  return false;
8976  break;
8978  CompareOpc = 454;
8979  break;
8981  CompareOpc = 710;
8982  break;
8984  CompareOpc = 774;
8985  break;
8987  CompareOpc = 838;
8988  break;
8990  CompareOpc = 902;
8991  break;
8993  if (Subtarget.hasP8Altivec())
8994  CompareOpc = 967;
8995  else
8996  return false;
8997  break;
8999  CompareOpc = 518;
9000  break;
9002  CompareOpc = 582;
9003  break;
9005  CompareOpc = 646;
9006  break;
9008  if (Subtarget.hasP8Altivec())
9009  CompareOpc = 711;
9010  else
9011  return false;
9012  break;
9013  }
9014  return true;
9015 }
9016 
9017 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
9018 /// lower, do it, otherwise return null.
9019 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
9020  SelectionDAG &DAG) const {
9021  unsigned IntrinsicID =
9022  cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
9023 
9024  SDLoc dl(Op);
9025 
9026  if (IntrinsicID == Intrinsic::thread_pointer) {
9027  // Reads the thread pointer register, used for __builtin_thread_pointer.
9028  if (Subtarget.isPPC64())
9029  return DAG.getRegister(PPC::X13, MVT::i64);
9030  return DAG.getRegister(PPC::R2, MVT::i32);
9031  }
9032 
9033  // If this is a lowered altivec predicate compare, CompareOpc is set to the
9034  // opcode number of the comparison.
9035  int CompareOpc;
9036  bool isDot;
9037  if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
9038  return SDValue(); // Don't custom lower most intrinsics.
9039 
9040  // If this is a non-dot comparison, make the VCMP node and we are done.
9041  if (!isDot) {
9042  SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
9043  Op.getOperand(1), Op.getOperand(2),
9044  DAG.getConstant(CompareOpc, dl, MVT::i32));
9045  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
9046  }
9047 
9048  // Create the PPCISD altivec 'dot' comparison node.
9049  SDValue Ops[] = {
9050  Op.getOperand(2), // LHS
9051  Op.getOperand(3), // RHS
9052  DAG.getConstant(CompareOpc, dl, MVT::i32)
9053  };
9054  EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
9055  SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
9056 
9057  // Now that we have the comparison, emit a copy from the CR to a GPR.
9058  // This is flagged to the above dot comparison.
9059  SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
9060  DAG.getRegister(PPC::CR6, MVT::i32),
9061  CompNode.getValue(1));
9062 
9063  // Unpack the result based on how the target uses it.
9064  unsigned BitNo; // Bit # of CR6.
9065  bool InvertBit; // Invert result?
9066  switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
9067  default: // Can't happen, don't crash on invalid number though.
9068  case 0: // Return the value of the EQ bit of CR6.
9069  BitNo = 0; InvertBit = false;
9070  break;
9071  case 1: // Return the inverted value of the EQ bit of CR6.
9072  BitNo = 0; InvertBit = true;
9073  break;
9074  case 2: // Return the value of the LT bit of CR6.
9075  BitNo = 2; InvertBit = false;
9076  break;
9077  case 3: // Return the inverted value of the LT bit of CR6.
9078  BitNo = 2; InvertBit = true;
9079  break;
9080  }
9081 
9082  // Shift the bit into the low position.
9083  Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
9084  DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
9085  // Isolate the bit.
9086  Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
9087  DAG.getConstant(1, dl, MVT::i32));
9088 
9089  // If we are supposed to, toggle the bit.
9090  if (InvertBit)
9091  Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
9092  DAG.getConstant(1, dl, MVT::i32));
9093  return Flags;
9094 }
9095 
9096 SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
9097  SelectionDAG &DAG) const {
9098  // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
9099  // the beginning of the argument list.
9100  int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
9101  SDLoc DL(Op);
9102  switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
9103  case Intrinsic::ppc_cfence: {
9104  assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
9105  assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
9106  return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
9107  DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
9108  Op.getOperand(ArgStart + 1)),
9109  Op.getOperand(0)),
9110  0);
9111  }
9112  default:
9113  break;
9114  }
9115  return SDValue();
9116 }
9117 
9118 SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const {
9119  // Check for a DIV with the same operands as this REM.
9120  for (auto UI : Op.getOperand(1)->uses()) {
9121  if ((Op.getOpcode() == ISD::SREM && UI->getOpcode() == ISD::SDIV) ||
9122  (Op.getOpcode() == ISD::UREM && UI->getOpcode() == ISD::UDIV))
9123  if (UI->getOperand(0) == Op.getOperand(0) &&
9124  UI->getOperand(1) == Op.getOperand(1))
9125  return SDValue();
9126  }
9127  return Op;
9128 }
9129 
9130 // Lower scalar BSWAP64 to xxbrd.
9131 SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
9132  SDLoc dl(Op);
9133  // MTVSRDD
9134  Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
9135  Op.getOperand(0));
9136  // XXBRD
9137  Op = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Op);
9138  // MFVSRD
9139  int VectorIndex = 0;
9140  if (Subtarget.isLittleEndian())
9141  VectorIndex = 1;
9142  Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
9143  DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
9144  return Op;
9145 }
9146 
9147 // ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
9148 // compared to a value that is atomically loaded (atomic loads zero-extend).
9149 SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
9150  SelectionDAG &DAG) const {
9152  "Expecting an atomic compare-and-swap here.");
9153  SDLoc dl(Op);
9154  auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
9155  EVT MemVT = AtomicNode->getMemoryVT();
9156  if (MemVT.getSizeInBits() >= 32)
9157  return Op;
9158 
9159  SDValue CmpOp = Op.getOperand(2);
9160  // If this is already correctly zero-extended, leave it alone.
9161  auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
9162  if (DAG.MaskedValueIsZero(CmpOp, HighBits))
9163  return Op;
9164 
9165  // Clear the high bits of the compare operand.
9166  unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
9167  SDValue NewCmpOp =
9168  DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
9169  DAG.getConstant(MaskVal, dl, MVT::i32));
9170 
9171  // Replace the existing compare operand with the properly zero-extended one.
9173  for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
9174  Ops.push_back(AtomicNode->getOperand(i));
9175  Ops[2] = NewCmpOp;
9176  MachineMemOperand *MMO = AtomicNode->getMemOperand();
9177  SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
9178  auto NodeTy =
9180  return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
9181 }
9182 
9183 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
9184  SelectionDAG &DAG) const {
9185  SDLoc dl(Op);
9186  // Create a stack slot that is 16-byte aligned.
9188  int FrameIdx = MFI.CreateStackObject(16, 16, false);
9189  EVT PtrVT = getPointerTy(DAG.getDataLayout());
9190  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9191 
9192  // Store the input value into Value#0 of the stack slot.
9193  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
9194  MachinePointerInfo());
9195  // Load it out.
9196  return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
9197 }
9198 
9199 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
9200  SelectionDAG &DAG) const {
9202  "Should only be called for ISD::INSERT_VECTOR_ELT");
9203 
9205  // We have legal lowering for constant indices but not for variable ones.
9206  if (!C)
9207  return SDValue();
9208 
9209  EVT VT = Op.getValueType();
9210  SDLoc dl(Op);
9211  SDValue V1 = Op.getOperand(0);
9212  SDValue V2 = Op.getOperand(1);
9213  // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
9214  if (VT == MVT::v8i16 || VT == MVT::v16i8) {
9215  SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
9216  unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
9217  unsigned InsertAtElement = C->getZExtValue();
9218  unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
9219  if (Subtarget.isLittleEndian()) {
9220  InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
9221  }
9222  return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
9223  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9224  }
9225  return Op;
9226 }
9227 
9228 SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
9229  SelectionDAG &DAG) const {
9230  SDLoc dl(Op);
9231  SDNode *N = Op.getNode();
9232 
9233  assert(N->getOperand(0).getValueType() == MVT::v4i1 &&
9234  "Unknown extract_vector_elt type");
9235 
9236  SDValue Value = N->getOperand(0);
9237 
9238  // The first part of this is like the store lowering except that we don't
9239  // need to track the chain.
9240 
9241  // The values are now known to be -1 (false) or 1 (true). To convert this
9242  // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
9243  // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
9244  Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
9245 
9246  // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
9247  // understand how to form the extending load.
9248  SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
9249 
9250  Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
9251 
9252  // Now convert to an integer and store.
9253  Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
9255  Value);
9256 
9258  int FrameIdx = MFI.CreateStackObject(16, 16, false);
9259  MachinePointerInfo PtrInfo =
9261  EVT PtrVT = getPointerTy(DAG.getDataLayout());
9262  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9263 
9264  SDValue StoreChain = DAG.getEntryNode();
9265  SDValue Ops[] = {StoreChain,
9267  Value, FIdx};
9268  SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
9269 
9270  StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
9271  dl, VTs, Ops, MVT::v4i32, PtrInfo);
9272 
9273  // Extract the value requested.
9274  unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
9275  SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
9276  Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
9277 
9278  SDValue IntVal =
9279  DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset));
9280 
9281  if (!Subtarget.useCRBits())
9282  return IntVal;
9283 
9284  return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal);
9285 }
9286 
9287 /// Lowering for QPX v4i1 loads
9288 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
9289  SelectionDAG &DAG) const {
9290  SDLoc dl(Op);
9291  LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
9292  SDValue LoadChain = LN->getChain();
9293  SDValue BasePtr = LN->getBasePtr();
9294 
9295  if (Op.getValueType() == MVT::v4f64 ||
9296  Op.getValueType() == MVT::v4f32) {
9297  EVT MemVT = LN->getMemoryVT();
9298  unsigned Alignment = LN->getAlignment();
9299 
9300  // If this load is properly aligned, then it is legal.
9301  if (Alignment >= MemVT.getStoreSize())
9302  return Op;
9303 
9304  EVT ScalarVT = Op.getValueType().getScalarType(),
9305  ScalarMemVT = MemVT.getScalarType();
9306  unsigned Stride = ScalarMemVT.getStoreSize();
9307 
9308  SDValue Vals[4], LoadChains[4];
9309  for (unsigned Idx = 0; Idx < 4; ++Idx) {
9310  SDValue Load;
9311  if (ScalarVT != ScalarMemVT)
9312  Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain,
9313  BasePtr,
9314  LN->getPointerInfo().getWithOffset(Idx * Stride),
9315  ScalarMemVT, MinAlign(Alignment, Idx * Stride),
9316  LN->getMemOperand()->getFlags(), LN->getAAInfo());
9317  else
9318  Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr,
9319  LN->getPointerInfo().getWithOffset(Idx * Stride),
9320  MinAlign(Alignment, Idx * Stride),
9321  LN->getMemOperand()->getFlags(), LN->getAAInfo());
9322 
9323  if (Idx == 0 && LN->isIndexed()) {
9325  "Unknown addressing mode on vector load");
9326  Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(),
9327  LN->getAddressingMode());
9328  }
9329 
9330  Vals[Idx] = Load;
9331  LoadChains[Idx] = Load.getValue(1);
9332 
9333  BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
9334  DAG.getConstant(Stride, dl,
9335  BasePtr.getValueType()));
9336  }
9337 
9338  SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
9339  SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals);
9340 
9341  if (LN->isIndexed()) {
9342  SDValue RetOps[] = { Value, Vals[0].getValue(1), TF };
9343  return DAG.getMergeValues(RetOps, dl);
9344  }
9345 
9346  SDValue RetOps[] = { Value, TF };
9347  return DAG.getMergeValues(RetOps, dl);
9348  }
9349 
9350  assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower");
9351  assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported");
9352 
9353  // To lower v4i1 from a byte array, we load the byte elements of the
9354  // vector and then reuse the BUILD_VECTOR logic.
9355 
9356  SDValue VectElmts[4], VectElmtChains[4];
9357  for (unsigned i = 0; i < 4; ++i) {
9358  SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
9359  Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
9360 
9361  VectElmts[i] = DAG.getExtLoad(
9362  ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx,
9364  /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo());
9365  VectElmtChains[i] = VectElmts[i].getValue(1);
9366  }
9367 
9368  LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains);
9369  SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts);
9370 
9371  SDValue RVals[] = { Value, LoadChain };
9372  return DAG.getMergeValues(RVals, dl);
9373 }
9374 
9375 /// Lowering for QPX v4i1 stores
9376 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
9377  SelectionDAG &DAG) const {
9378  SDLoc dl(Op);
9379  StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
9380  SDValue StoreChain = SN->getChain();
9381  SDValue BasePtr = SN->getBasePtr();
9382  SDValue Value = SN->getValue();
9383 
9384  if (Value.getValueType() == MVT::v4f64 ||
9385  Value.getValueType() == MVT::v4f32) {
9386  EVT MemVT = SN->getMemoryVT();
9387  unsigned Alignment = SN->getAlignment();
9388 
9389  // If this store is properly aligned, then it is legal.
9390  if (Alignment >= MemVT.getStoreSize())
9391  return Op;
9392 
9393  EVT ScalarVT = Value.getValueType().getScalarType(),
9394  ScalarMemVT = MemVT.getScalarType();
9395  unsigned Stride = ScalarMemVT.getStoreSize();
9396 
9397  SDValue Stores[4];
9398  for (unsigned Idx = 0; Idx < 4; ++Idx) {
9399  SDValue Ex = DAG.getNode(
9400  ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
9401  DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout())));
9402  SDValue Store;
9403  if (ScalarVT != ScalarMemVT)
9404  Store =
9405  DAG.getTruncStore(StoreChain, dl, Ex, BasePtr,
9406  SN->getPointerInfo().getWithOffset(Idx * Stride),
9407  ScalarMemVT, MinAlign(Alignment, Idx * Stride),
9408  SN->getMemOperand()->getFlags(), SN->getAAInfo());
9409  else
9410  Store = DAG.getStore(StoreChain, dl, Ex, BasePtr,
9411  SN->getPointerInfo().getWithOffset(Idx * Stride),
9412  MinAlign(Alignment, Idx * Stride),
9413  SN->getMemOperand()->getFlags(), SN->getAAInfo());
9414 
9415  if (Idx == 0 && SN->isIndexed()) {
9417  "Unknown addressing mode on vector store");
9418  Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(),
9419  SN->getAddressingMode());
9420  }
9421 
9422  BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
9423  DAG.getConstant(Stride, dl,
9424  BasePtr.getValueType()));
9425  Stores[Idx] = Store;
9426  }
9427 
9428  SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9429 
9430  if (SN->isIndexed()) {
9431  SDValue RetOps[] = { TF, Stores[0].getValue(1) };
9432  return DAG.getMergeValues(RetOps, dl);
9433  }
9434 
9435  return TF;
9436  }
9437 
9438  assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported");
9439  assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower");
9440 
9441  // The values are now known to be -1 (false) or 1 (true). To convert this
9442  // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
9443  // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
9444  Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
9445 
9446  // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
9447  // understand how to form the extending load.
9448  SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
9449 
9450  Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
9451 
9452  // Now convert to an integer and store.
9453  Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
9455  Value);
9456 
9458  int FrameIdx = MFI.CreateStackObject(16, 16, false);
9459  MachinePointerInfo PtrInfo =
9461  EVT PtrVT = getPointerTy(DAG.getDataLayout());
9462  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9463 
9464  SDValue Ops[] = {StoreChain,
9466  Value, FIdx};
9467  SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
9468 
9469  StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
9470  dl, VTs, Ops, MVT::v4i32, PtrInfo);
9471 
9472  // Move data into the byte array.
9473  SDValue Loads[4], LoadChains[4];
9474  for (unsigned i = 0; i < 4; ++i) {
9475  unsigned Offset = 4*i;
9476  SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
9477  Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
9478 
9479  Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
9480  PtrInfo.getWithOffset(Offset));
9481  LoadChains[i] = Loads[i].getValue(1);
9482  }
9483 
9484  StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
9485 
9486  SDValue Stores[4];
9487  for (unsigned i = 0; i < 4; ++i) {
9488  SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
9489  Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
9490 
9491  Stores[i] = DAG.getTruncStore(
9492  StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i),
9493  MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(),
9494  SN->getAAInfo());
9495  }
9496 
9497  StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9498 
9499  return StoreChain;
9500 }
9501 
9502 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
9503  SDLoc dl(Op);
9504  if (Op.getValueType() == MVT::v4i32) {
9505  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
9506 
9507  SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl);
9508  SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
9509 
9510  SDValue RHSSwap = // = vrlw RHS, 16
9511  BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
9512 
9513  // Shrinkify inputs to v8i16.
9514  LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
9515  RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
9516  RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
9517 
9518  // Low parts multiplied together, generating 32-bit results (we ignore the
9519  // top parts).
9521  LHS, RHS, DAG, dl, MVT::v4i32);
9522 
9524  LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
9525  // Shift the high parts up 16 bits.
9527  Neg16, DAG, dl);
9528  return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
9529  } else if (Op.getValueType() == MVT::v8i16) {
9530  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
9531 
9532  SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
9533 
9535  LHS, RHS, Zero, DAG, dl);
9536  } else if (Op.getValueType() == MVT::v16i8) {
9537  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
9538  bool isLittleEndian = Subtarget.isLittleEndian();
9539 
9540  // Multiply the even 8-bit parts, producing 16-bit sums.
9542  LHS, RHS, DAG, dl, MVT::v8i16);
9543  EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
9544 
9545  // Multiply the odd 8-bit parts, producing 16-bit sums.
9547  LHS, RHS, DAG, dl, MVT::v8i16);
9548  OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
9549 
9550  // Merge the results together. Because vmuleub and vmuloub are
9551  // instructions with a big-endian bias, we must reverse the
9552  // element numbering and reverse the meaning of "odd" and "even"
9553  // when generating little endian code.
9554  int Ops[16];
9555  for (unsigned i = 0; i != 8; ++i) {
9556  if (isLittleEndian) {
9557  Ops[i*2 ] = 2*i;
9558  Ops[i*2+1] = 2*i+16;
9559  } else {
9560  Ops[i*2 ] = 2*i+1;
9561  Ops[i*2+1] = 2*i+1+16;
9562  }
9563  }
9564  if (isLittleEndian)
9565  return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
9566  else
9567  return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
9568  } else {
9569  llvm_unreachable("Unknown mul to lower!");
9570  }
9571 }
9572 
9573 SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
9574 
9575  assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS");
9576 
9577  EVT VT = Op.getValueType();
9578  assert(VT.isVector() &&
9579  "Only set vector abs as custom, scalar abs shouldn't reach here!");
9580  assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
9581  VT == MVT::v16i8) &&
9582  "Unexpected vector element type!");
9583  assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) &&
9584  "Current subtarget doesn't support smax v2i64!");
9585 
9586  // For vector abs, it can be lowered to:
9587  // abs x
9588  // ==>
9589  // y = -x
9590  // smax(x, y)
9591 
9592  SDLoc dl(Op);
9593  SDValue X = Op.getOperand(0);
9594  SDValue Zero = DAG.getConstant(0, dl, VT);
9595  SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X);
9596 
9597  // SMAX patch https://reviews.llvm.org/D47332
9598  // hasn't landed yet, so use intrinsic first here.
9599  // TODO: Should use SMAX directly once SMAX patch landed
9601  if (VT == MVT::v2i64)
9603  else if (VT == MVT::v8i16)
9605  else if (VT == MVT::v16i8)
9607 
9608  return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);
9609 }
9610 
9611 /// LowerOperation - Provide custom lowering hooks for some operations.
9612 ///
9614  switch (Op.getOpcode()) {
9615  default: llvm_unreachable("Wasn't expecting to be able to lower this!");
9616  case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
9617  case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
9618  case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
9619  case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
9620  case ISD::JumpTable: return LowerJumpTable(Op, DAG);
9621  case ISD::SETCC: return LowerSETCC(Op, DAG);
9622  case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
9623  case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
9624 
9625  // Variable argument lowering.
9626  case ISD::VASTART: return LowerVASTART(Op, DAG);
9627  case ISD::VAARG: return LowerVAARG(Op, DAG);
9628  case ISD::VACOPY: return LowerVACOPY(Op, DAG);
9629 
9630  case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
9631  case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
9633  return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
9634 
9635  // Exception handling lowering.
9636  case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
9637  case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
9638  case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
9639 
9640  case ISD::LOAD: return LowerLOAD(Op, DAG);
9641  case ISD::STORE: return LowerSTORE(Op, DAG);
9642  case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
9643  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
9644  case ISD::FP_TO_UINT:
9645  case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
9646  case ISD::UINT_TO_FP:
9647  case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
9648  case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
9649 
9650  // Lower 64-bit shifts.
9651  case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
9652  case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
9653  case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
9654 
9655  // Vector-related lowering.
9656  case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
9657  case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
9658  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
9659  case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
9660  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
9661  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
9662  case ISD::MUL: return LowerMUL(Op, DAG);
9663  case ISD::ABS: return LowerABS(Op, DAG);
9664 
9665  // For counter-based loop handling.
9666  case ISD::INTRINSIC_W_CHAIN: return SDValue();
9667 
9668  case ISD::BITCAST: return LowerBITCAST(Op, DAG);
9669 
9670  // Frame & Return address.
9671  case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
9672  case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
9673 
9674  case ISD::INTRINSIC_VOID:
9675  return LowerINTRINSIC_VOID(Op, DAG);
9676  case ISD::SREM:
9677  case ISD::UREM:
9678  return LowerREM(Op, DAG);
9679  case ISD::BSWAP:
9680  return LowerBSWAP(Op, DAG);
9681  case ISD::ATOMIC_CMP_SWAP:
9682  return LowerATOMIC_CMP_SWAP(Op, DAG);
9683  }
9684 }
9685 
9688  SelectionDAG &DAG) const {
9689  SDLoc dl(N);
9690  switch (N->getOpcode()) {
9691  default:
9692  llvm_unreachable("Do not know how to custom type legalize this operation!");
9693  case ISD::READCYCLECOUNTER: {
9695  SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
9696 
9697  Results.push_back(RTB);
9698  Results.push_back(RTB.getValue(1));
9699  Results.push_back(RTB.getValue(2));
9700  break;
9701  }
9702  case ISD::INTRINSIC_W_CHAIN: {
9703  if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
9705  break;
9706 
9707  assert(N->getValueType(0) == MVT::i1 &&
9708  "Unexpected result type for CTR decrement intrinsic");
9709  EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
9710  N->getValueType(0));
9711  SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
9712  SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
9713  N->getOperand(1));
9714 
9715  Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
9716  Results.push_back(NewInt.getValue(1));
9717  break;
9718  }
9719  case ISD::VAARG: {
9720  if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
9721  return;
9722 
9723  EVT VT = N->getValueType(0);
9724 
9725  if (VT == MVT::i64) {
9726  SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
9727 
9728  Results.push_back(NewNode);
9729  Results.push_back(NewNode.getValue(1));
9730  }
9731  return;
9732  }
9733  case ISD::FP_TO_SINT:
9734  case ISD::FP_TO_UINT:
9735  // LowerFP_TO_INT() can only handle f32 and f64.
9736  if (N->getOperand(0).getValueType() == MVT::ppcf128)
9737  return;
9738  Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
9739  return;
9740  case ISD::BITCAST:
9741  // Don't handle bitcast here.
9742  return;
9743  }
9744 }
9745 
9746 //===----------------------------------------------------------------------===//
9747 // Other Lowering Code
9748 //===----------------------------------------------------------------------===//
9749 
9751  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
9752  Function *Func = Intrinsic::getDeclaration(M, Id);
9753  return Builder.CreateCall(Func, {});
9754 }
9755 
9756 // The mappings for emitLeading/TrailingFence is taken from
9757 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
9759  Instruction *Inst,
9760  AtomicOrdering Ord) const {
9762  return callIntrinsic(Builder, Intrinsic::ppc_sync);
9763  if (isReleaseOrStronger(Ord))
9764  return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
9765  return nullptr;
9766 }
9767 
9769  Instruction *Inst,
9770  AtomicOrdering Ord) const {
9771  if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
9772  // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
9773  // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
9774  // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
9775  if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
9776  return Builder.CreateCall(
9778  Builder.GetInsertBlock()->getParent()->getParent(),
9779  Intrinsic::ppc_cfence, {Inst->getType()}),
9780  {Inst});
9781  // FIXME: Can use isync for rmw operation.
9782  return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
9783  }
9784  return nullptr;
9785 }
9786 
9789  unsigned AtomicSize,
9790  unsigned BinOpcode,
9791  unsigned CmpOpcode,
9792  unsigned CmpPred) const {
9793  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
9794  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9795 
9796  auto LoadMnemonic = PPC::LDARX;
9797  auto StoreMnemonic = PPC::STDCX;
9798  switch (AtomicSize) {
9799  default:
9800  llvm_unreachable("Unexpected size of atomic entity");
9801  case 1:
9802  LoadMnemonic = PPC::LBARX;
9803  StoreMnemonic = PPC::STBCX;
9804  assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
9805  break;
9806  case 2:
9807  LoadMnemonic = PPC::LHARX;
9808  StoreMnemonic = PPC::STHCX;
9809  assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
9810  break;
9811  case 4:
9812  LoadMnemonic = PPC::LWARX;
9813  StoreMnemonic = PPC::STWCX;
9814  break;
9815  case 8:
9816  LoadMnemonic = PPC::LDARX;
9817  StoreMnemonic = PPC::STDCX;
9818  break;
9819  }
9820 
9821  const BasicBlock *LLVM_BB = BB->getBasicBlock();
9822  MachineFunction *F = BB->getParent();
9824 
9825  unsigned dest = MI.getOperand(0).getReg();
9826  unsigned ptrA = MI.getOperand(1).getReg();
9827  unsigned ptrB = MI.getOperand(2).getReg();
9828  unsigned incr = MI.getOperand(3).getReg();
9829  DebugLoc dl = MI.getDebugLoc();
9830 
9831  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
9832  MachineBasicBlock *loop2MBB =
9833  CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
9834  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
9835  F->insert(It, loopMBB);
9836  if (CmpOpcode)
9837  F->insert(It, loop2MBB);
9838  F->insert(It, exitMBB);
9839  exitMBB->splice(exitMBB->begin(), BB,
9840  std::next(MachineBasicBlock::iterator(MI)), BB->end());
9841  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
9842 
9843  MachineRegisterInfo &RegInfo = F->getRegInfo();
9844  unsigned TmpReg = (!BinOpcode) ? incr :
9845  RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
9846  : &PPC::GPRCRegClass);
9847 
9848  // thisMBB:
9849  // ...
9850  // fallthrough --> loopMBB
9851  BB->addSuccessor(loopMBB);
9852 
9853  // loopMBB:
9854  // l[wd]arx dest, ptr
9855  // add r0, dest, incr
9856  // st[wd]cx. r0, ptr
9857  // bne- loopMBB
9858  // fallthrough --> exitMBB
9859 
9860  // For max/min...
9861  // loopMBB:
9862  // l[wd]arx dest, ptr
9863  // cmpl?[wd] incr, dest
9864  // bgt exitMBB
9865  // loop2MBB:
9866  // st[wd]cx. dest, ptr
9867  // bne- loopMBB
9868  // fallthrough --> exitMBB
9869 
9870  BB = loopMBB;
9871  BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
9872  .addReg(ptrA).addReg(ptrB);
9873  if (BinOpcode)
9874  BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
9875  if (CmpOpcode) {
9876  // Signed comparisons of byte or halfword values must be sign-extended.
9877  if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
9878  unsigned ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
9879  BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
9880  ExtReg).addReg(dest);
9881  BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
9882  .addReg(incr).addReg(ExtReg);
9883  } else
9884  BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
9885  .addReg(incr).addReg(dest);
9886 
9887  BuildMI(BB, dl, TII->get(PPC::BCC))
9888  .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
9889  BB->addSuccessor(loop2MBB);
9890  BB->addSuccessor(exitMBB);
9891  BB = loop2MBB;
9892  }
9893  BuildMI(BB, dl, TII->get(StoreMnemonic))
9894  .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
9895  BuildMI(BB, dl, TII->get(PPC::BCC))
9896  .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
9897  BB->addSuccessor(loopMBB);
9898  BB->addSuccessor(exitMBB);
9899 
9900  // exitMBB:
9901  // ...
9902  BB = exitMBB;
9903  return BB;
9904 }
9905 
9908  bool is8bit, // operation
9909  unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
9910  // If we support part-word atomic mnemonics, just use them
9911  if (Subtarget.hasPartwordAtomics())
9912  return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
9913  CmpPred);
9914 
9915  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
9916  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9917  // In 64 bit mode we have to use 64 bits for addresses, even though the
9918  // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
9919  // registers without caring whether they're 32 or 64, but here we're
9920  // doing actual arithmetic on the addresses.
9921  bool is64bit = Subtarget.isPPC64();
9922  bool isLittleEndian = Subtarget.isLittleEndian();
9923  unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
9924 
9925  const BasicBlock *LLVM_BB = BB->getBasicBlock();
9926  MachineFunction *F = BB->getParent();
9928 
9929  unsigned dest = MI.getOperand(0).getReg();
9930  unsigned ptrA = MI.getOperand(1).getReg();
9931  unsigned ptrB = MI.getOperand(2).getReg();
9932  unsigned incr = MI.getOperand(3).getReg();
9933  DebugLoc dl = MI.getDebugLoc();
9934 
9935  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
9936  MachineBasicBlock *loop2MBB =
9937  CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
9938  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
9939  F->insert(It, loopMBB);
9940  if (CmpOpcode)
9941  F->insert(It, loop2MBB);
9942  F->insert(It, exitMBB);
9943  exitMBB->splice(exitMBB->begin(), BB,
9944  std::next(MachineBasicBlock::iterator(MI)), BB->end());
9945  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
9946 
9947  MachineRegisterInfo &RegInfo = F->getRegInfo();
9948  const TargetRegisterClass *RC =
9949  is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
9950  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
9951 
9952  unsigned PtrReg = RegInfo.createVirtualRegister(RC);
9953  unsigned Shift1Reg = RegInfo.createVirtualRegister(GPRC);
9954  unsigned ShiftReg =
9955  isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
9956  unsigned Incr2Reg = RegInfo.createVirtualRegister(GPRC);
9957  unsigned MaskReg = RegInfo.createVirtualRegister(GPRC);
9958  unsigned Mask2Reg = RegInfo.createVirtualRegister(GPRC);
9959  unsigned Mask3Reg = RegInfo.createVirtualRegister(GPRC);
9960  unsigned Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
9961  unsigned Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
9962  unsigned Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
9963  unsigned TmpDestReg = RegInfo.createVirtualRegister(GPRC);
9964  unsigned Ptr1Reg;
9965  unsigned TmpReg =
9966  (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
9967 
9968  // thisMBB:
9969  // ...
9970  // fallthrough --> loopMBB
9971  BB->addSuccessor(loopMBB);
9972 
9973  // The 4-byte load must be aligned, while a char or short may be
9974  // anywhere in the word. Hence all this nasty bookkeeping code.
9975  // add ptr1, ptrA, ptrB [copy if ptrA==0]
9976  // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
9977  // xori shift, shift1, 24 [16]
9978  // rlwinm ptr, ptr1, 0, 0, 29
9979  // slw incr2, incr, shift
9980  // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
9981  // slw mask, mask2, shift
9982  // loopMBB:
9983  // lwarx tmpDest, ptr
9984  // add tmp, tmpDest, incr2
9985  // andc tmp2, tmpDest, mask
9986  // and tmp3, tmp, mask
9987  // or tmp4, tmp3, tmp2
9988  // stwcx. tmp4, ptr
9989  // bne- loopMBB
9990  // fallthrough --> exitMBB
9991  // srw dest, tmpDest, shift
9992  if (ptrA != ZeroReg) {
9993  Ptr1Reg = RegInfo.createVirtualRegister(RC);
9994  BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
9995  .addReg(ptrA)
9996  .addReg(ptrB);
9997  } else {
9998  Ptr1Reg = ptrB;
9999  }
10000  // We need use 32-bit subregister to avoid mismatch register class in 64-bit
10001  // mode.
10002  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
10003  .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
10004  .addImm(3)
10005  .addImm(27)
10006  .addImm(is8bit ? 28 : 27);
10007  if (!isLittleEndian)
10008  BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
10009  .addReg(Shift1Reg)
10010  .addImm(is8bit ? 24 : 16);
10011  if (is64bit)
10012  BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
10013  .addReg(Ptr1Reg)
10014  .addImm(0)
10015  .addImm(61);
10016  else
10017  BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
10018  .addReg(Ptr1Reg)
10019  .addImm(0)
10020  .addImm(0)
10021  .addImm(29);
10022  BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
10023  if (is8bit)
10024  BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
10025  else {
10026  BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
10027  BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
10028  .addReg(Mask3Reg)
10029  .addImm(65535);
10030  }
10031  BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
10032  .addReg(Mask2Reg)
10033  .addReg(ShiftReg);
10034 
10035  BB = loopMBB;
10036  BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
10037  .addReg(ZeroReg)
10038  .addReg(PtrReg);
10039  if (BinOpcode)
10040  BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
10041  .addReg(Incr2Reg)
10042  .addReg(TmpDestReg);
10043  BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
10044  .addReg(TmpDestReg)
10045  .addReg(MaskReg);
10046  BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
10047  if (CmpOpcode) {
10048  // For unsigned comparisons, we can directly compare the shifted values.
10049  // For signed comparisons we shift and sign extend.
10050  unsigned SReg = RegInfo.createVirtualRegister(GPRC);
10051  BuildMI(BB, dl, TII->get(PPC::AND), SReg)
10052  .addReg(TmpDestReg)
10053  .addReg(MaskReg);
10054  unsigned ValueReg = SReg;
10055  unsigned CmpReg = Incr2Reg;
10056  if (CmpOpcode == PPC::CMPW) {
10057  ValueReg = RegInfo.createVirtualRegister(GPRC);
10058  BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
10059  .addReg(SReg)
10060  .addReg(ShiftReg);
10061  unsigned ValueSReg = RegInfo.createVirtualRegister(GPRC);
10062  BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
10063  .addReg(ValueReg);
10064  ValueReg = ValueSReg;
10065  CmpReg = incr;
10066  }
10067  BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10068  .addReg(CmpReg)
10069  .addReg(ValueReg);
10070  BuildMI(BB, dl, TII->get(PPC::BCC))
10071  .addImm(CmpPred)
10072  .addReg(PPC::CR0)
10073  .addMBB(exitMBB);
10074  BB->addSuccessor(loop2MBB);
10075  BB->addSuccessor(exitMBB);
10076  BB = loop2MBB;
10077  }
10078  BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
10079  BuildMI(BB, dl, TII->get(PPC::STWCX))
10080  .addReg(Tmp4Reg)
10081  .addReg(ZeroReg)
10082  .addReg(PtrReg);
10083  BuildMI(BB, dl, TII->get(PPC::BCC))
10084  .addImm(PPC::PRED_NE)
10085  .addReg(PPC::CR0)
10086  .addMBB(loopMBB);
10087  BB->addSuccessor(loopMBB);
10088  BB->addSuccessor(exitMBB);
10089 
10090  // exitMBB:
10091  // ...
10092  BB = exitMBB;
10093  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
10094  .addReg(TmpDestReg)
10095  .addReg(ShiftReg);
10096  return BB;
10097 }
10098 
10101  MachineBasicBlock *MBB) const {
10102  DebugLoc DL = MI.getDebugLoc();
10103  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10104  const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
10105 
10106  MachineFunction *MF = MBB->getParent();
10108 
10109  const BasicBlock *BB = MBB->getBasicBlock();
10111 
10112  unsigned DstReg = MI.getOperand(0).getReg();
10113  const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
10114  assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
10115  unsigned mainDstReg = MRI.createVirtualRegister(RC);
10116  unsigned restoreDstReg = MRI.createVirtualRegister(RC);
10117 
10118  MVT PVT = getPointerTy(MF->getDataLayout());
10119  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
10120  "Invalid Pointer Size!");
10121  // For v = setjmp(buf), we generate
10122  //
10123  // thisMBB:
10124  // SjLjSetup mainMBB
10125  // bl mainMBB
10126  // v_restore = 1
10127  // b sinkMBB
10128  //
10129  // mainMBB:
10130  // buf[LabelOffset] = LR
10131  // v_main = 0
10132  //
10133  // sinkMBB:
10134  // v = phi(main, restore)
10135  //
10136 
10137  MachineBasicBlock *thisMBB = MBB;
10138  MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
10139  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
10140  MF->insert(I, mainMBB);
10141  MF->insert(I, sinkMBB);
10142 
10143  MachineInstrBuilder MIB;
10144 
10145  // Transfer the remainder of BB and its successor edges to sinkMBB.
10146  sinkMBB->splice(sinkMBB->begin(), MBB,
10147  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
10148  sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
10149 
10150  // Note that the structure of the jmp_buf used here is not compatible
10151  // with that used by libc, and is not designed to be. Specifically, it
10152  // stores only those 'reserved' registers that LLVM does not otherwise
10153  // understand how to spill. Also, by convention, by the time this
10154  // intrinsic is called, Clang has already stored the frame address in the
10155  // first slot of the buffer and stack address in the third. Following the
10156  // X86 target code, we'll store the jump address in the second slot. We also
10157  // need to save the TOC pointer (R2) to handle jumps between shared
10158  // libraries, and that will be stored in the fourth slot. The thread
10159  // identifier (R13) is not affected.
10160 
10161  // thisMBB:
10162  const int64_t LabelOffset = 1 * PVT.getStoreSize();
10163  const int64_t TOCOffset = 3 * PVT.getStoreSize();
10164  const int64_t BPOffset = 4 * PVT.getStoreSize();
10165 
10166  // Prepare IP either in reg.
10167  const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
10168  unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
10169  unsigned BufReg = MI.getOperand(1).getReg();
10170 
10171  if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
10172  setUsesTOCBasePtr(*MBB->getParent());
10173  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
10174  .addReg(PPC::X2)
10175  .addImm(TOCOffset)
10176  .addReg(BufReg)
10177  .cloneMemRefs(MI);
10178  }
10179 
10180  // Naked functions never have a base pointer, and so we use r1. For all
10181  // other functions, this decision must be delayed until during PEI.
10182  unsigned BaseReg;
10184  BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
10185  else
10186  BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
10187 
10188  MIB = BuildMI(*thisMBB, MI, DL,
10189  TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
10190  .addReg(BaseReg)
10191  .addImm(BPOffset)
10192  .addReg(BufReg)
10193  .cloneMemRefs(MI);
10194 
10195  // Setup
10196  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
10197  MIB.addRegMask(TRI->getNoPreservedMask());
10198 
10199  BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
10200 
10201  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
10202  .addMBB(mainMBB);
10203  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
10204 
10205  thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
10206  thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
10207 
10208  // mainMBB:
10209  // mainDstReg = 0
10210  MIB =
10211  BuildMI(mainMBB, DL,
10212  TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
10213 
10214  // Store IP
10215  if (Subtarget.isPPC64()) {
10216  MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
10217  .addReg(LabelReg)
10218  .addImm(LabelOffset)
10219  .addReg(BufReg);
10220  } else {
10221  MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
10222  .addReg(LabelReg)
10223  .addImm(LabelOffset)
10224  .addReg(BufReg);
10225  }
10226  MIB.cloneMemRefs(MI);
10227 
10228  BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
10229  mainMBB->addSuccessor(sinkMBB);
10230 
10231  // sinkMBB:
10232  BuildMI(*sinkMBB, sinkMBB->begin(), DL,
10233  TII->get(PPC::PHI), DstReg)
10234  .addReg(mainDstReg).addMBB(mainMBB)
10235  .addReg(restoreDstReg).addMBB(thisMBB);
10236 
10237  MI.eraseFromParent();
10238  return sinkMBB;
10239 }
10240 
10243  MachineBasicBlock *MBB) const {
10244  DebugLoc DL = MI.getDebugLoc();
10245  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10246 
10247  MachineFunction *MF = MBB->getParent();
10249 
10250  MVT PVT = getPointerTy(MF->getDataLayout());
10251  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
10252  "Invalid Pointer Size!");
10253 
10254  const TargetRegisterClass *RC =
10255  (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
10256  unsigned Tmp = MRI.createVirtualRegister(RC);
10257  // Since FP is only updated here but NOT referenced, it's treated as GPR.
10258  unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
10259  unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
10260  unsigned BP =
10261  (PVT == MVT::i64)
10262  ? PPC::X30
10263  : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
10264  : PPC::R30);
10265 
10266  MachineInstrBuilder MIB;
10267 
10268  const int64_t LabelOffset = 1 * PVT.getStoreSize();
10269  const int64_t SPOffset = 2 * PVT.getStoreSize();
10270  const int64_t TOCOffset = 3 * PVT.getStoreSize();
10271  const int64_t BPOffset = 4 * PVT.getStoreSize();
10272 
10273  unsigned BufReg = MI.getOperand(0).getReg();
10274 
10275  // Reload FP (the jumped-to function may not have had a
10276  // frame pointer, and if so, then its r31 will be restored
10277  // as necessary).
10278  if (PVT == MVT::i64) {
10279  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
10280  .addImm(0)
10281  .addReg(BufReg);
10282  } else {
10283  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
10284  .addImm(0)
10285  .addReg(BufReg);
10286  }
10287  MIB.cloneMemRefs(MI);
10288 
10289  // Reload IP
10290  if (PVT == MVT::i64) {
10291  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
10292  .addImm(LabelOffset)
10293  .addReg(BufReg);
10294  } else {
10295  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
10296  .addImm(LabelOffset)
10297  .addReg(BufReg);
10298  }
10299  MIB.cloneMemRefs(MI);
10300 
10301  // Reload SP
10302  if (PVT == MVT::i64) {
10303  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
10304  .addImm(SPOffset)
10305  .addReg(BufReg);
10306  } else {
10307  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
10308  .addImm(SPOffset)
10309  .addReg(BufReg);
10310  }
10311  MIB.cloneMemRefs(MI);
10312 
10313  // Reload BP
10314  if (PVT == MVT::i64) {
10315  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
10316  .addImm(BPOffset)
10317  .addReg(BufReg);
10318  } else {
10319  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
10320  .addImm(BPOffset)
10321  .addReg(BufReg);
10322  }
10323  MIB.cloneMemRefs(MI);
10324 
10325  // Reload TOC
10326  if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
10327  setUsesTOCBasePtr(*MBB->getParent());
10328  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
10329  .addImm(TOCOffset)
10330  .addReg(BufReg)
10331  .cloneMemRefs(MI);
10332  }
10333 
10334  // Jump
10335  BuildMI(*MBB, MI, DL,
10336  TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
10337  BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
10338 
10339  MI.eraseFromParent();
10340  return MBB;
10341 }
10342 
10345  MachineBasicBlock *BB) const {
10346  if (MI.getOpcode() == TargetOpcode::STACKMAP ||
10347  MI.getOpcode() == TargetOpcode::PATCHPOINT) {
10348  if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() &&
10349  MI.getOpcode() == TargetOpcode::PATCHPOINT) {
10350  // Call lowering should have added an r2 operand to indicate a dependence
10351  // on the TOC base pointer value. It can't however, because there is no
10352  // way to mark the dependence as implicit there, and so the stackmap code
10353  // will confuse it with a regular operand. Instead, add the dependence
10354  // here.
10355  MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
10356  }
10357 
10358  return emitPatchPoint(MI, BB);
10359  }
10360 
10361  if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
10362  MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
10363  return emitEHSjLjSetJmp(MI, BB);
10364  } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
10365  MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
10366  return emitEHSjLjLongJmp(MI, BB);
10367  }
10368 
10369  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10370 
10371  // To "insert" these instructions we actually have to insert their
10372  // control-flow patterns.
10373  const BasicBlock *LLVM_BB = BB->getBasicBlock();
10375 
10376  MachineFunction *F = BB->getParent();
10377 
10378  if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
10379  MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
10380  MI.getOpcode() == PPC::SELECT_I8) {
10382  if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
10383  MI.getOpcode() == PPC::SELECT_CC_I8)
10384  Cond.push_back(MI.getOperand(4));
10385  else
10387  Cond.push_back(MI.getOperand(1));
10388 
10389  DebugLoc dl = MI.getDebugLoc();
10390  TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
10391  MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
10392  } else if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
10393  MI.getOpcode() == PPC::SELECT_CC_I8 ||
10394  MI.getOpcode() == PPC::SELECT_CC_F4 ||
10395  MI.getOpcode() == PPC::SELECT_CC_F8 ||
10396  MI.getOpcode() == PPC::SELECT_CC_F16 ||
10397  MI.getOpcode() == PPC::SELECT_CC_QFRC ||
10398  MI.getOpcode() == PPC::SELECT_CC_QSRC ||
10399  MI.getOpcode() == PPC::SELECT_CC_QBRC ||
10400  MI.getOpcode() == PPC::SELECT_CC_VRRC ||
10401  MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
10402  MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
10403  MI.getOpcode() == PPC::SELECT_CC_VSRC ||
10404  MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
10405  MI.getOpcode() == PPC::SELECT_CC_SPE ||
10406  MI.getOpcode() == PPC::SELECT_I4 ||
10407  MI.getOpcode() == PPC::SELECT_I8 ||
10408  MI.getOpcode() == PPC::SELECT_F4 ||
10409  MI.getOpcode() == PPC::SELECT_F8 ||
10410  MI.getOpcode() == PPC::SELECT_F16 ||
10411  MI.getOpcode() == PPC::SELECT_QFRC ||
10412  MI.getOpcode() == PPC::SELECT_QSRC ||
10413  MI.getOpcode() == PPC::SELECT_QBRC ||
10414  MI.getOpcode() == PPC::SELECT_SPE ||
10415  MI.getOpcode() == PPC::SELECT_SPE4 ||
10416  MI.getOpcode() == PPC::SELECT_VRRC ||
10417  MI.getOpcode() == PPC::SELECT_VSFRC ||
10418  MI.getOpcode() == PPC::SELECT_VSSRC ||
10419  MI.getOpcode() == PPC::SELECT_VSRC) {
10420  // The incoming instruction knows the destination vreg to set, the
10421  // condition code register to branch on, the true/false values to
10422  // select between, and a branch opcode to use.
10423 
10424  // thisMBB:
10425  // ...
10426  // TrueVal = ...
10427  // cmpTY ccX, r1, r2
10428  // bCC copy1MBB
10429  // fallthrough --> copy0MBB
10430  MachineBasicBlock *thisMBB = BB;
10431  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
10432  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
10433  DebugLoc dl = MI.getDebugLoc();
10434  F->insert(It, copy0MBB);
10435  F->insert(It, sinkMBB);
10436 
10437  // Transfer the remainder of BB and its successor edges to sinkMBB.
10438  sinkMBB->splice(sinkMBB->begin(), BB,
10439  std::next(MachineBasicBlock::iterator(MI)), BB->end());
10440  sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
10441 
10442  // Next, add the true and fallthrough blocks as its successors.
10443  BB->addSuccessor(copy0MBB);
10444  BB->addSuccessor(sinkMBB);
10445 
10446  if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
10447  MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
10448  MI.getOpcode() == PPC::SELECT_F16 ||
10449  MI.getOpcode() == PPC::SELECT_SPE4 ||
10450  MI.getOpcode() == PPC::SELECT_SPE ||
10451  MI.getOpcode() == PPC::SELECT_QFRC ||
10452  MI.getOpcode() == PPC::SELECT_QSRC ||
10453  MI.getOpcode() == PPC::SELECT_QBRC ||
10454  MI.getOpcode() == PPC::SELECT_VRRC ||
10455  MI.getOpcode() == PPC::SELECT_VSFRC ||
10456  MI.getOpcode() == PPC::SELECT_VSSRC ||
10457  MI.getOpcode() == PPC::SELECT_VSRC) {
10458  BuildMI(BB, dl, TII->get(PPC::BC))
10459  .addReg(MI.getOperand(1).getReg())
10460  .addMBB(sinkMBB);
10461  } else {
10462  unsigned SelectPred = MI.getOperand(4).getImm();
10463  BuildMI(BB, dl, TII->get(PPC::BCC))
10464  .addImm(SelectPred)
10465  .addReg(MI.getOperand(1).getReg())
10466  .addMBB(sinkMBB);
10467  }
10468 
10469  // copy0MBB:
10470  // %FalseValue = ...
10471  // # fallthrough to sinkMBB
10472  BB = copy0MBB;
10473 
10474  // Update machine-CFG edges
10475  BB->addSuccessor(sinkMBB);
10476 
10477  // sinkMBB:
10478  // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
10479  // ...
10480  BB = sinkMBB;
10481  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
10482  .addReg(MI.getOperand(3).getReg())
10483  .addMBB(copy0MBB)
10484  .addReg(MI.getOperand(2).getReg())
10485  .addMBB(thisMBB);
10486  } else if (MI.getOpcode() == PPC::ReadTB) {
10487  // To read the 64-bit time-base register on a 32-bit target, we read the
10488  // two halves. Should the counter have wrapped while it was being read, we
10489  // need to try again.
10490  // ...
10491  // readLoop:
10492  // mfspr Rx,TBU # load from TBU
10493  // mfspr Ry,TB # load from TB
10494  // mfspr Rz,TBU # load from TBU
10495  // cmpw crX,Rx,Rz # check if 'old'='new'
10496  // bne readLoop # branch if they're not equal
10497  // ...
10498 
10499  MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
10500  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
10501  DebugLoc dl = MI.getDebugLoc();
10502  F->insert(It, readMBB);
10503  F->insert(It, sinkMBB);
10504 
10505  // Transfer the remainder of BB and its successor edges to sinkMBB.
10506  sinkMBB->splice(sinkMBB->begin(), BB,
10507  std::next(MachineBasicBlock::iterator(MI)), BB->end());
10508  sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
10509 
10510  BB->addSuccessor(readMBB);
10511  BB = readMBB;
10512 
10513  MachineRegisterInfo &RegInfo = F->getRegInfo();
10514  unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
10515  unsigned LoReg = MI.getOperand(0).getReg();
10516  unsigned HiReg = MI.getOperand(1).getReg();
10517 
10518  BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
10519  BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
10520  BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
10521 
10522  unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
10523 
10524  BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
10525  .addReg(HiReg)
10526  .addReg(ReadAgainReg);
10527  BuildMI(BB, dl, TII->get(PPC::BCC))
10528  .addImm(PPC::PRED_NE)
10529  .addReg(CmpReg)
10530  .addMBB(readMBB);
10531 
10532  BB->addSuccessor(readMBB);
10533  BB->addSuccessor(sinkMBB);
10534  } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
10535  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
10536  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
10537  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
10538  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
10539  BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
10540  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
10541  BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
10542 
10543  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
10544  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
10545  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
10546  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
10547  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
10548  BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
10549  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
10550  BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
10551 
10552  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
10553  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
10554  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
10555  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
10556  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
10557  BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
10558  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
10559  BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
10560 
10561  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
10562  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
10563  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
10564  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
10565  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
10566  BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
10567  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
10568  BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
10569 
10570  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
10571  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
10572  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
10573  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
10574  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
10575  BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
10576  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
10577  BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
10578 
10579  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
10580  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
10581  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
10582  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
10583  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
10584  BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
10585  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
10586  BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
10587 
10588  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
10589  BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
10590  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
10591  BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
10592  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
10593  BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
10594  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
10595  BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
10596 
10597  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
10598  BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
10599  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
10600  BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
10601  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
10602  BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
10603  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
10604  BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
10605 
10606  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
10607  BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
10608  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
10609  BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
10610  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
10611  BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
10612  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
10613  BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
10614 
10615  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
10616  BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
10617  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
10618  BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
10619  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
10620  BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
10621  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
10622  BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
10623 
10624  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
10625  BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
10626  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
10627  BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
10628  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
10629  BB = EmitAtomicBinary(MI, BB, 4, 0);
10630  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
10631  BB = EmitAtomicBinary(MI, BB, 8, 0);
10632  else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
10633  MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
10634  (Subtarget.hasPartwordAtomics() &&
10635  MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
10636  (Subtarget.hasPartwordAtomics() &&
10637  MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
10638  bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
10639 
10640  auto LoadMnemonic = PPC::LDARX;
10641  auto StoreMnemonic = PPC::STDCX;
10642  switch (MI.getOpcode()) {
10643  default:
10644  llvm_unreachable("Compare and swap of unknown size");
10645  case PPC::ATOMIC_CMP_SWAP_I8:
10646  LoadMnemonic = PPC::LBARX;
10647  StoreMnemonic = PPC::STBCX;
10648  assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
10649  break;
10650  case PPC::ATOMIC_CMP_SWAP_I16:
10651  LoadMnemonic = PPC::LHARX;
10652  StoreMnemonic = PPC::STHCX;
10653  assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
10654  break;
10655  case PPC::ATOMIC_CMP_SWAP_I32:
10656  LoadMnemonic = PPC::LWARX;
10657  StoreMnemonic = PPC::STWCX;
10658  break;
10659  case PPC::ATOMIC_CMP_SWAP_I64:
10660  LoadMnemonic = PPC::LDARX;
10661  StoreMnemonic = PPC::STDCX;
10662  break;
10663  }
10664  unsigned dest = MI.getOperand(0).getReg();
10665  unsigned ptrA = MI.getOperand(1).getReg();
10666  unsigned ptrB = MI.getOperand(2).getReg();
10667  unsigned oldval = MI.getOperand(3).getReg();
10668  unsigned newval = MI.getOperand(4).getReg();
10669  DebugLoc dl = MI.getDebugLoc();
10670 
10671  MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
10672  MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
10673  MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
10674  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10675  F->insert(It, loop1MBB);
10676  F->insert(It, loop2MBB);
10677  F->insert(It, midMBB);
10678  F->insert(It, exitMBB);
10679  exitMBB->splice(exitMBB->begin(), BB,
10680  std::next(MachineBasicBlock::iterator(MI)), BB->end());
10681  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
10682 
10683  // thisMBB:
10684  // ...
10685  // fallthrough --> loopMBB
10686  BB->addSuccessor(loop1MBB);
10687 
10688  // loop1MBB:
10689  // l[bhwd]arx dest, ptr
10690  // cmp[wd] dest, oldval
10691  // bne- midMBB
10692  // loop2MBB:
10693  // st[bhwd]cx. newval, ptr
10694  // bne- loopMBB
10695  // b exitBB
10696  // midMBB:
10697  // st[bhwd]cx. dest, ptr
10698  // exitBB:
10699  BB = loop1MBB;
10700  BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
10701  BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
10702  .addReg(oldval)
10703  .addReg(dest);
10704  BuildMI(BB, dl, TII->get(PPC::BCC))
10705  .addImm(PPC::PRED_NE)
10706  .addReg(PPC::CR0)
10707  .addMBB(midMBB);
10708  BB->addSuccessor(loop2MBB);
10709  BB->addSuccessor(midMBB);
10710 
10711  BB = loop2MBB;
10712  BuildMI(BB, dl, TII->get(StoreMnemonic))
10713  .addReg(newval)
10714  .addReg(ptrA)
10715  .addReg(ptrB);
10716  BuildMI(BB, dl, TII->get(PPC::BCC))
10717  .addImm(PPC::PRED_NE)
10718  .addReg(PPC::CR0)
10719  .addMBB(loop1MBB);
10720  BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
10721  BB->addSuccessor(loop1MBB);
10722  BB->addSuccessor(exitMBB);
10723 
10724  BB = midMBB;
10725  BuildMI(BB, dl, TII->get(StoreMnemonic))
10726  .addReg(dest)
10727  .addReg(ptrA)
10728  .addReg(ptrB);
10729  BB->addSuccessor(exitMBB);
10730 
10731  // exitMBB:
10732  // ...
10733  BB = exitMBB;
10734  } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
10735  MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
10736  // We must use 64-bit registers for addresses when targeting 64-bit,
10737  // since we're actually doing arithmetic on them. Other registers
10738  // can be 32-bit.
10739  bool is64bit = Subtarget.isPPC64();
10740  bool isLittleEndian = Subtarget.isLittleEndian();
10741  bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
10742 
10743  unsigned dest = MI.getOperand(0).getReg();
10744  unsigned ptrA = MI.getOperand(1).getReg();
10745  unsigned ptrB = MI.getOperand(2).getReg();
10746  unsigned oldval = MI.getOperand(3).getReg();
10747  unsigned newval = MI.getOperand(4).getReg();
10748  DebugLoc dl = MI.getDebugLoc();
10749 
10750  MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
10751  MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
10752  MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
10753  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10754  F->insert(It, loop1MBB);
10755  F->insert(It, loop2MBB);
10756  F->insert(It, midMBB);
10757  F->insert(It, exitMBB);
10758  exitMBB->splice(exitMBB->begin(), BB,
10759  std::next(MachineBasicBlock::iterator(MI)), BB->end());
10760  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
10761 
10762  MachineRegisterInfo &RegInfo = F->getRegInfo();
10763  const TargetRegisterClass *RC =
10764  is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
10765  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
10766 
10767  unsigned PtrReg = RegInfo.createVirtualRegister(RC);
10768  unsigned Shift1Reg = RegInfo.createVirtualRegister(GPRC);
10769  unsigned ShiftReg =
10770  isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
10771  unsigned NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
10772  unsigned NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
10773  unsigned OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
10774  unsigned OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
10775  unsigned MaskReg = RegInfo.createVirtualRegister(GPRC);
10776  unsigned Mask2Reg = RegInfo.createVirtualRegister(GPRC);
10777  unsigned Mask3Reg = RegInfo.createVirtualRegister(GPRC);
10778  unsigned Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
10779  unsigned Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
10780  unsigned TmpDestReg = RegInfo.createVirtualRegister(GPRC);
10781  unsigned Ptr1Reg;
10782  unsigned TmpReg = RegInfo.createVirtualRegister(GPRC);
10783  unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
10784  // thisMBB:
10785  // ...
10786  // fallthrough --> loopMBB
10787  BB->addSuccessor(loop1MBB);
10788 
10789  // The 4-byte load must be aligned, while a char or short may be
10790  // anywhere in the word. Hence all this nasty bookkeeping code.
10791  // add ptr1, ptrA, ptrB [copy if ptrA==0]
10792  // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
10793  // xori shift, shift1, 24 [16]
10794  // rlwinm ptr, ptr1, 0, 0, 29
10795  // slw newval2, newval, shift
10796  // slw oldval2, oldval,shift
10797  // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
10798  // slw mask, mask2, shift
10799  // and newval3, newval2, mask
10800  // and oldval3, oldval2, mask
10801  // loop1MBB:
10802  // lwarx tmpDest, ptr
10803  // and tmp, tmpDest, mask
10804  // cmpw tmp, oldval3
10805  // bne- midMBB
10806  // loop2MBB:
10807  // andc tmp2, tmpDest, mask
10808  // or tmp4, tmp2, newval3
10809  // stwcx. tmp4, ptr
10810  // bne- loop1MBB
10811  // b exitBB
10812  // midMBB:
10813  // stwcx. tmpDest, ptr
10814  // exitBB:
10815  // srw dest, tmpDest, shift
10816  if (ptrA != ZeroReg) {
10817  Ptr1Reg = RegInfo.createVirtualRegister(RC);
10818  BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
10819  .addReg(ptrA)
10820  .addReg(ptrB);
10821  } else {
10822  Ptr1Reg = ptrB;
10823  }
10824 
10825  // We need use 32-bit subregister to avoid mismatch register class in 64-bit
10826  // mode.
10827  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
10828  .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
10829  .addImm(3)
10830  .addImm(27)
10831  .addImm(is8bit ? 28 : 27);
10832  if (!isLittleEndian)
10833  BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
10834  .addReg(Shift1Reg)
10835  .addImm(is8bit ? 24 : 16);
10836  if (is64bit)
10837  BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
10838  .addReg(Ptr1Reg)
10839  .addImm(0)
10840  .addImm(61);
10841  else
10842  BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
10843  .addReg(Ptr1Reg)
10844  .addImm(0)
10845  .addImm(0)
10846  .addImm(29);
10847  BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
10848  .addReg(newval)
10849  .addReg(ShiftReg);
10850  BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
10851  .addReg(oldval)
10852  .addReg(ShiftReg);
10853  if (is8bit)
10854  BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
10855  else {
10856  BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
10857  BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
10858  .addReg(Mask3Reg)
10859  .addImm(65535);
10860  }
10861  BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
10862  .addReg(Mask2Reg)
10863  .addReg(ShiftReg);
10864  BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
10865  .addReg(NewVal2Reg)
10866  .addReg(MaskReg);
10867  BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
10868  .addReg(OldVal2Reg)
10869  .addReg(MaskReg);
10870 
10871  BB = loop1MBB;
10872  BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
10873  .addReg(ZeroReg)
10874  .addReg(PtrReg);
10875  BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
10876  .addReg(TmpDestReg)
10877  .addReg(MaskReg);
10878  BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
10879  .addReg(TmpReg)
10880  .addReg(OldVal3Reg);
10881  BuildMI(BB, dl, TII->get(PPC::BCC))
10882  .addImm(PPC::PRED_NE)
10883  .addReg(PPC::CR0)
10884  .addMBB(midMBB);
10885  BB->addSuccessor(loop2MBB);
10886  BB->addSuccessor(midMBB);
10887 
10888  BB = loop2MBB;
10889  BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
10890  .addReg(TmpDestReg)
10891  .addReg(MaskReg);
10892  BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
10893  .addReg(Tmp2Reg)
10894  .addReg(NewVal3Reg);
10895  BuildMI(BB, dl, TII->get(PPC::STWCX))
10896  .addReg(Tmp4Reg)
10897  .addReg(ZeroReg)
10898  .addReg(PtrReg);
10899  BuildMI(BB, dl, TII->get(PPC::BCC))
10900  .addImm(PPC::PRED_NE)
10901  .addReg(PPC::CR0)
10902  .addMBB(loop1MBB);
10903  BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
10904  BB->addSuccessor(loop1MBB);
10905  BB->addSuccessor(exitMBB);
10906 
10907  BB = midMBB;
10908  BuildMI(BB, dl, TII->get(PPC::STWCX))
10909  .addReg(TmpDestReg)
10910  .addReg(ZeroReg)
10911  .addReg(PtrReg);
10912  BB->addSuccessor(exitMBB);
10913 
10914  // exitMBB:
10915  // ...
10916  BB = exitMBB;
10917  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
10918  .addReg(TmpReg)
10919  .addReg(ShiftReg);
10920  } else if (MI.getOpcode() == PPC::FADDrtz) {
10921  // This pseudo performs an FADD with rounding mode temporarily forced
10922  // to round-to-zero. We emit this via custom inserter since the FPSCR
10923  // is not modeled at the SelectionDAG level.
10924  unsigned Dest = MI.getOperand(0).getReg();
10925  unsigned Src1 = MI.getOperand(1).getReg();
10926  unsigned Src2 = MI.getOperand(2).getReg();
10927  DebugLoc dl = MI.getDebugLoc();
10928 
10929  MachineRegisterInfo &RegInfo = F->getRegInfo();
10930  unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
10931 
10932  // Save FPSCR value.
10933  BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
10934 
10935  // Set rounding mode to round-to-zero.
10936  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
10937  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
10938 
10939  // Perform addition.
10940  BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
10941 
10942  // Restore FPSCR value.
10943  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
10944  } else if (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
10945  MI.getOpcode() == PPC::ANDIo_1_GT_BIT ||
10946  MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
10947  MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) {
10948  unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
10949  MI.getOpcode() == PPC::ANDIo_1_GT_BIT8)
10950  ? PPC::ANDIo8
10951  : PPC::ANDIo;
10952  bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
10953  MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8);
10954 
10955  MachineRegisterInfo &RegInfo = F->getRegInfo();
10956  unsigned Dest = RegInfo.createVirtualRegister(
10957  Opcode == PPC::ANDIo ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
10958 
10959  DebugLoc dl = MI.getDebugLoc();
10960  BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
10961  .addReg(MI.getOperand(1).getReg())
10962  .addImm(1);
10963  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
10964  MI.getOperand(0).getReg())
10965  .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
10966  } else if (MI.getOpcode() == PPC::TCHECK_RET) {
10967  DebugLoc Dl = MI.getDebugLoc();
10968  MachineRegisterInfo &RegInfo = F->getRegInfo();
10969  unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
10970  BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
10971  return BB;
10972  } else {
10973  llvm_unreachable("Unexpected instr type to insert");
10974  }
10975 
10976  MI.eraseFromParent(); // The pseudo instruction is gone now.
10977  return BB;
10978 }
10979 
10980 //===----------------------------------------------------------------------===//
10981 // Target Optimization Hooks
10982 //===----------------------------------------------------------------------===//
10983 
10984 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
10985  // For the estimates, convergence is quadratic, so we essentially double the
10986  // number of digits correct after every iteration. For both FRE and FRSQRTE,
10987  // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
10988  // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
10989  int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
10990  if (VT.getScalarType() == MVT::f64)
10991  RefinementSteps++;
10992  return RefinementSteps;
10993 }
10994 
10995 SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
10996  int Enabled, int &RefinementSteps,
10997  bool &UseOneConstNR,
10998  bool Reciprocal) const {
10999  EVT VT = Operand.getValueType();
11000  if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
11001  (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
11002  (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
11003  (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
11004  (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
11005  (VT == MVT::v4f64 && Subtarget.hasQPX())) {
11006  if (RefinementSteps == ReciprocalEstimate::Unspecified)
11007  RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
11008 
11009  UseOneConstNR = true;
11010  return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
11011  }
11012  return SDValue();
11013 }
11014 
11015 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
11016  int Enabled,
11017  int &RefinementSteps) const {
11018  EVT VT = Operand.getValueType();
11019  if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
11020  (VT == MVT::f64 && Subtarget.hasFRE()) ||
11021  (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
11022  (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
11023  (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
11024  (VT == MVT::v4f64 && Subtarget.hasQPX())) {
11025  if (RefinementSteps == ReciprocalEstimate::Unspecified)
11026  RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
11027  return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
11028  }
11029  return SDValue();
11030 }
11031 
11032 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
11033  // Note: This functionality is used only when unsafe-fp-math is enabled, and
11034  // on cores with reciprocal estimates (which are used when unsafe-fp-math is
11035  // enabled for division), this functionality is redundant with the default
11036  // combiner logic (once the division -> reciprocal/multiply transformation
11037  // has taken place). As a result, this matters more for older cores than for
11038  // newer ones.
11039 
11040  // Combine multiple FDIVs with the same divisor into multiple FMULs by the
11041  // reciprocal if there are two or more FDIVs (for embedded cores with only
11042  // one FP pipeline) for three or more FDIVs (for generic OOO cores).
11043  switch (Subtarget.getDarwinDirective()) {
11044  default:
11045  return 3;
11046  case PPC::DIR_440:
11047  case PPC::DIR_A2:
11048  case PPC::DIR_E500:
11049  case PPC::DIR_E500mc:
11050  case PPC::DIR_E5500:
11051  return 2;
11052  }
11053 }
11054 
11055 // isConsecutiveLSLoc needs to work even if all adds have not yet been
11056 // collapsed, and so we need to look through chains of them.
11058  int64_t& Offset, SelectionDAG &DAG) {
11059  if (DAG.isBaseWithConstantOffset(Loc)) {
11060  Base = Loc.getOperand(0);
11061  Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
11062 
11063  // The base might itself be a base plus an offset, and if so, accumulate
11064  // that as well.
11065  getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
11066  }
11067 }
11068 
11070  unsigned Bytes, int Dist,
11071  SelectionDAG &DAG) {
11072  if (VT.getSizeInBits() / 8 != Bytes)
11073  return false;
11074 
11075  SDValue BaseLoc = Base->getBasePtr();
11076  if (Loc.getOpcode() == ISD::FrameIndex) {
11077  if (BaseLoc.getOpcode() != ISD::FrameIndex)
11078  return false;
11079  const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
11080  int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
11081  int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
11082  int FS = MFI.getObjectSize(FI);
11083  int BFS = MFI.getObjectSize(BFI);
11084  if (FS != BFS || FS != (int)Bytes) return false;
11085  return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
11086  }
11087 
11088  SDValue Base1 = Loc, Base2 = BaseLoc;
11089  int64_t Offset1 = 0, Offset2 = 0;
11090  getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
11091  getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
11092  if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
11093  return true;
11094 
11095  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11096  const GlobalValue *GV1 = nullptr;
11097  const GlobalValue *GV2 = nullptr;
11098  Offset1 = 0;
11099  Offset2 = 0;
11100  bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
11101  bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
11102  if (isGA1 && isGA2 && GV1 == GV2)
11103  return Offset1 == (Offset2 + Dist*Bytes);
11104  return false;
11105 }
11106 
11107 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
11108 // not enforce equality of the chain operands.
11110  unsigned Bytes, int Dist,
11111  SelectionDAG &DAG) {
11112  if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
11113  EVT VT = LS->getMemoryVT();
11114  SDValue Loc = LS->getBasePtr();
11115  return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
11116  }
11117 
11118  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
11119  EVT VT;
11120  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
11121  default: return false;
11124  VT = MVT::v4f64;
11125  break;
11128  VT = MVT::v4f32;
11129  break;
11132  VT = MVT::v2f64;
11133  break;
11136  VT = MVT::v2f32;
11137  break;
11144  VT = MVT::v4i32;
11145  break;
11148  VT = MVT::v2f64;
11149  break;
11151  VT = MVT::i8;
11152  break;
11154  VT = MVT::i16;
11155  break;
11157  VT = MVT::i32;
11158  break;
11159  }
11160 
11161  return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
11162  }
11163 
11164  if (N->getOpcode() == ISD::INTRINSIC_VOID) {
11165  EVT VT;
11166  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
11167  default: return false;
11170  VT = MVT::v4f64;
11171  break;
11174  VT = MVT::v4f32;
11175  break;
11178  VT = MVT::v2f64;
11179  break;
11182  VT = MVT::v2f32;
11183  break;
11189  VT = MVT::v4i32;
11190  break;
11192  VT = MVT::v2f64;
11193  break;
11195  VT = MVT::v4i32;
11196  break;
11198  VT = MVT::v2f64;
11199  break;
11201  VT = MVT::i8;
11202  break;
11204  VT = MVT::i16;
11205  break;
11207  VT = MVT::i32;
11208  break;
11209  }
11210 
11211  return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
11212  }
11213 
11214  return false;
11215 }
11216 
11217 // Return true is there is a nearyby consecutive load to the one provided
11218 // (regardless of alignment). We search up and down the chain, looking though
11219 // token factors and other loads (but nothing else). As a result, a true result
11220 // indicates that it is safe to create a new consecutive load adjacent to the
11221 // load provided.
11223  SDValue Chain = LD->getChain();
11224  EVT VT = LD->getMemoryVT();
11225 
11226  SmallSet<SDNode *, 16> LoadRoots;
11227  SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
11228  SmallSet<SDNode *, 16> Visited;
11229 
11230  // First, search up the chain, branching to follow all token-factor operands.
11231  // If we find a consecutive load, then we're done, otherwise, record all
11232  // nodes just above the top-level loads and token factors.
11233  while (!Queue.empty()) {
11234  SDNode *ChainNext = Queue.pop_back_val();
11235  if (!Visited.insert(ChainNext).second)
11236  continue;
11237 
11238  if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
11239  if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
11240  return true;
11241 
11242  if (!Visited.count(ChainLD->getChain().getNode()))
11243  Queue.push_back(ChainLD->getChain().getNode());
11244  } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
11245  for (const SDUse &O : ChainNext->ops())
11246  if (!Visited.count(O.getNode()))
11247  Queue.push_back(O.getNode());
11248  } else
11249  LoadRoots.insert(ChainNext);
11250  }
11251 
11252  // Second, search down the chain, starting from the top-level nodes recorded
11253  // in the first phase. These top-level nodes are the nodes just above all
11254  // loads and token factors. Starting with their uses, recursively look though
11255  // all loads (just the chain uses) and token factors to find a consecutive
11256  // load.
11257  Visited.clear();
11258  Queue.clear();
11259 
11260  for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
11261  IE = LoadRoots.end(); I != IE; ++I) {
11262  Queue.push_back(*I);
11263 
11264  while (!Queue.empty()) {
11265  SDNode *LoadRoot = Queue.pop_back_val();
11266  if (!Visited.insert(LoadRoot).second)
11267  continue;
11268 
11269  if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
11270  if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
11271  return true;
11272 
11273  for (SDNode::use_iterator UI = LoadRoot->use_begin(),
11274  UE = LoadRoot->use_end(); UI != UE; ++UI)
11275  if (((isa<MemSDNode>(*UI) &&
11276  cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
11277  UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
11278  Queue.push_back(*UI);
11279  }
11280  }
11281 
11282  return false;
11283 }
11284 
11285 /// This function is called when we have proved that a SETCC node can be replaced
11286 /// by subtraction (and other supporting instructions) so that the result of
11287 /// comparison is kept in a GPR instead of CR. This function is purely for
11288 /// codegen purposes and has some flags to guide the codegen process.
11289 static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
11290  bool Swap, SDLoc &DL, SelectionDAG &DAG) {
11291  assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
11292 
11293  // Zero extend the operands to the largest legal integer. Originally, they
11294  // must be of a strictly smaller size.
11295  auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
11296  DAG.getConstant(Size, DL, MVT::i32));
11297  auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
11298  DAG.getConstant(Size, DL, MVT::i32));
11299 
11300  // Swap if needed. Depends on the condition code.
11301  if (Swap)
11302  std::swap(Op0, Op1);
11303 
11304  // Subtract extended integers.
11305  auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
11306 
11307  // Move the sign bit to the least significant position and zero out the rest.
11308  // Now the least significant bit carries the result of original comparison.
11309  auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
11310  DAG.getConstant(Size - 1, DL, MVT::i32));
11311  auto Final = Shifted;
11312 
11313  // Complement the result if needed. Based on the condition code.
11314  if (Complement)
11315  Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
11316  DAG.getConstant(1, DL, MVT::i64));
11317 
11318  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
11319 }
11320 
11321 SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
11322  DAGCombinerInfo &DCI) const {
11323  assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
11324 
11325  SelectionDAG &DAG = DCI.DAG;
11326  SDLoc DL(N);
11327 
11328  // Size of integers being compared has a critical role in the following
11329  // analysis, so we prefer to do this when all types are legal.
11330  if (!DCI.isAfterLegalizeDAG())
11331  return SDValue();
11332 
11333  // If all users of SETCC extend its value to a legal integer type
11334  // then we replace SETCC with a subtraction
11335  for (SDNode::use_iterator UI = N->use_begin(),
11336  UE = N->use_end(); UI != UE; ++UI) {
11337  if (UI->getOpcode() != ISD::ZERO_EXTEND)
11338  return SDValue();
11339  }
11340 
11341  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
11342  auto OpSize = N->getOperand(0).getValueSizeInBits();
11343 
11344  unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
11345 
11346  if (OpSize < Size) {
11347  switch (CC) {
11348  default: break;
11349  case ISD::SETULT:
11350  return generateEquivalentSub(N, Size, false, false, DL, DAG);
11351  case ISD::SETULE:
11352  return generateEquivalentSub(N, Size, true, true, DL, DAG);
11353  case ISD::SETUGT:
11354  return generateEquivalentSub(N, Size, false, true, DL, DAG);
11355  case ISD::SETUGE:
11356  return generateEquivalentSub(N, Size, true, false, DL, DAG);
11357  }
11358  }
11359 
11360  return SDValue();
11361 }
11362 
11363 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
11364  DAGCombinerInfo &DCI) const {
11365  SelectionDAG &DAG = DCI.DAG;
11366  SDLoc dl(N);
11367 
11368  assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
11369  // If we're tracking CR bits, we need to be careful that we don't have:
11370  // trunc(binary-ops(zext(x), zext(y)))
11371  // or
11372  // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
11373  // such that we're unnecessarily moving things into GPRs when it would be
11374  // better to keep them in CR bits.
11375 
11376  // Note that trunc here can be an actual i1 trunc, or can be the effective
11377  // truncation that comes from a setcc or select_cc.
11378  if (N->getOpcode() == ISD::TRUNCATE &&
11379  N->getValueType(0) != MVT::i1)
11380  return SDValue();
11381 
11382  if (N->getOperand(0).getValueType() != MVT::i32 &&
11383  N->getOperand(0).getValueType() != MVT::i64)
11384  return SDValue();
11385 
11386  if (N->getOpcode() == ISD::SETCC ||
11387  N->getOpcode() == ISD::SELECT_CC) {
11388  // If we're looking at a comparison, then we need to make sure that the
11389  // high bits (all except for the first) don't matter the result.
11390  ISD::CondCode CC =
11391  cast<CondCodeSDNode>(N->getOperand(
11392  N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
11393  unsigned OpBits = N->getOperand(0).getValueSizeInBits();
11394 
11395  if (ISD::isSignedIntSetCC(CC)) {
11396  if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
11397  DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
11398  return SDValue();
11399  } else if (ISD::isUnsignedIntSetCC(CC)) {
11400  if (!DAG.MaskedValueIsZero(N->getOperand(0),
11401  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
11402  !DAG.MaskedValueIsZero(N->getOperand(1),
11403  APInt::getHighBitsSet(OpBits, OpBits-1)))
11404  return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
11405  : SDValue());
11406  } else {
11407  // This is neither a signed nor an unsigned comparison, just make sure
11408  // that the high bits are equal.
11409  KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
11410  KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
11411 
11412  // We don't really care about what is known about the first bit (if
11413  // anything), so clear it in all masks prior to comparing them.
11414  Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);
11415  Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);
11416 
11417  if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One)
11418  return SDValue();
11419  }
11420  }
11421 
11422  // We now know that the higher-order bits are irrelevant, we just need to
11423  // make sure that all of the intermediate operations are bit operations, and
11424  // all inputs are extensions.
11425  if (N->getOperand(0).getOpcode() != ISD::AND &&
11426  N->getOperand(0).getOpcode() != ISD::OR &&
11427  N->getOperand(0).getOpcode() != ISD::XOR &&
11428  N->getOperand(0).getOpcode() != ISD::SELECT &&
11429  N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
11430  N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
11431  N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
11432  N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
11434  return SDValue();
11435 
11436  if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
11437  N->getOperand(1).getOpcode() != ISD::AND &&
11438  N->getOperand(1).getOpcode() != ISD::OR &&
11439  N->getOperand(1).getOpcode() != ISD::XOR &&
11440  N->getOperand(1).getOpcode() != ISD::SELECT &&
11441  N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
11442  N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
11443  N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
11444  N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
11446  return SDValue();
11447 
11448  SmallVector<SDValue, 4> Inputs;
11449  SmallVector<SDValue, 8> BinOps, PromOps;
11450  SmallPtrSet<SDNode *, 16> Visited;
11451 
11452  for (unsigned i = 0; i < 2; ++i) {
11453  if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
11454  N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
11455  N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
11456  N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
11457  isa<ConstantSDNode>(N->getOperand(i)))
11458  Inputs.push_back(N->getOperand(i));
11459  else
11460  BinOps.push_back(N->getOperand(i));
11461 
11462  if (N->getOpcode() == ISD::TRUNCATE)
11463  break;
11464  }
11465 
11466  // Visit all inputs, collect all binary operations (and, or, xor and
11467  // select) that are all fed by extensions.
11468  while (!BinOps.empty()) {
11469  SDValue BinOp = BinOps.back();
11470  BinOps.pop_back();
11471 
11472  if (!Visited.insert(BinOp.getNode()).second)
11473  continue;
11474 
11475  PromOps.push_back(BinOp);
11476 
11477  for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
11478  // The condition of the select is not promoted.
11479  if (BinOp.getOpcode() == ISD::SELECT && i == 0)
11480  continue;
11481  if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
11482  continue;
11483 
11484  if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
11485  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
11486  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
11487  BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
11488  isa<ConstantSDNode>(BinOp.getOperand(i))) {
11489  Inputs.push_back(BinOp.getOperand(i));
11490  } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
11491  BinOp.getOperand(i).getOpcode() == ISD::OR ||
11492  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
11493  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
11494  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
11495  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
11496  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
11497  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
11498  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
11499  BinOps.push_back(BinOp.getOperand(i));
11500  } else {
11501  // We have an input that is not an extension or another binary
11502  // operation; we'll abort this transformation.
11503  return SDValue();
11504  }
11505  }
11506  }
11507 
11508  // Make sure that this is a self-contained cluster of operations (which
11509  // is not quite the same thing as saying that everything has only one
11510  // use).
11511  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
11512  if (isa<ConstantSDNode>(Inputs[i]))
11513  continue;
11514 
11515  for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
11516  UE = Inputs[i].getNode()->use_end();
11517  UI != UE; ++UI) {
11518  SDNode *User = *UI;
11519  if (User != N && !Visited.count(User))
11520  return SDValue();
11521 
11522  // Make sure that we're not going to promote the non-output-value
11523  // operand(s) or SELECT or SELECT_CC.
11524  // FIXME: Although we could sometimes handle this, and it does occur in
11525  // practice that one of the condition inputs to the select is also one of
11526  // the outputs, we currently can't deal with this.
11527  if (User->getOpcode() == ISD::SELECT) {
11528  if (User->getOperand(0) == Inputs[i])
11529  return SDValue();
11530  } else if (User->getOpcode() == ISD::SELECT_CC) {
11531  if (User->getOperand(0) == Inputs[i] ||
11532  User->getOperand(1) == Inputs[i])
11533  return SDValue();
11534  }
11535  }
11536  }
11537 
11538  for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
11539  for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
11540  UE = PromOps[i].getNode()->use_end();
11541  UI != UE; ++UI) {
11542  SDNode *User = *UI;
11543  if (User != N && !Visited.count(User))
11544  return SDValue();
11545 
11546  // Make sure that we're not going to promote the non-output-value
11547  // operand(s) or SELECT or SELECT_CC.
11548  // FIXME: Although we could sometimes handle this, and it does occur in
11549  // practice that one of the condition inputs to the select is also one of
11550  // the outputs, we currently can't deal with this.
11551  if (User->getOpcode() == ISD::SELECT) {
11552  if (User->getOperand(0) == PromOps[i])
11553  return SDValue();
11554  } else if (User->getOpcode() == ISD::SELECT_CC) {
11555  if (User->getOperand(0) == PromOps[i] ||
11556  User->getOperand(1) == PromOps[i])
11557  return SDValue();
11558  }
11559  }
11560  }
11561 
11562  // Replace all inputs with the extension operand.
11563  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
11564  // Constants may have users outside the cluster of to-be-promoted nodes,
11565  // and so we need to replace those as we do the promotions.
11566  if (isa<ConstantSDNode>(Inputs[i]))
11567  continue;
11568  else
11569  DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
11570  }
11571 
11572  std::list<HandleSDNode> PromOpHandles;
11573  for (auto &PromOp : PromOps)
11574  PromOpHandles.emplace_back(PromOp);
11575 
11576  // Replace all operations (these are all the same, but have a different
11577  // (i1) return type). DAG.getNode will validate that the types of
11578  // a binary operator match, so go through the list in reverse so that
11579  // we've likely promoted both operands first. Any intermediate truncations or
11580  // extensions disappear.
11581  while (!PromOpHandles.empty()) {
11582  SDValue PromOp = PromOpHandles.back().getValue();
11583  PromOpHandles.pop_back();
11584 
11585  if (PromOp.getOpcode() == ISD::TRUNCATE ||
11586  PromOp.getOpcode() == ISD::SIGN_EXTEND ||
11587  PromOp.getOpcode() == ISD::ZERO_EXTEND ||
11588  PromOp.getOpcode() == ISD::ANY_EXTEND) {
11589  if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
11590  PromOp.getOperand(0).getValueType() != MVT::i1) {
11591  // The operand is not yet ready (see comment below).
11592  PromOpHandles.emplace_front(PromOp);
11593  continue;
11594  }
11595 
11596  SDValue RepValue = PromOp.getOperand(0);
11597  if (isa<ConstantSDNode>(RepValue))
11598  RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
11599 
11600  DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
11601  continue;
11602  }
11603 
11604  unsigned C;
11605  switch (PromOp.getOpcode()) {
11606  default: C = 0; break;
11607  case ISD::SELECT: C = 1; break;
11608  case ISD::SELECT_CC: C = 2; break;
11609  }
11610 
11611  if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
11612  PromOp.getOperand(C).getValueType() != MVT::i1) ||
11613  (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
11614  PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
11615  // The to-be-promoted operands of this node have not yet been
11616  // promoted (this should be rare because we're going through the
11617  // list backward, but if one of the operands has several users in
11618  // this cluster of to-be-promoted nodes, it is possible).
11619  PromOpHandles.emplace_front(PromOp);
11620  continue;
11621  }
11622 
11623  SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
11624  PromOp.getNode()->op_end());
11625 
11626  // If there are any constant inputs, make sure they're replaced now.
11627  for (unsigned i = 0; i < 2; ++i)
11628  if (isa<ConstantSDNode>(Ops[C+i]))
11629  Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
11630 
11631  DAG.ReplaceAllUsesOfValueWith(PromOp,
11632  DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
11633  }
11634 
11635  // Now we're left with the initial truncation itself.
11636  if (N->getOpcode() == ISD::TRUNCATE)
11637  return N->getOperand(0);
11638 
11639  // Otherwise, this is a comparison. The operands to be compared have just
11640  // changed type (to i1), but everything else is the same.
11641  return SDValue(N, 0);
11642 }
11643 
11644 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
11645  DAGCombinerInfo &DCI) const {
11646  SelectionDAG &DAG = DCI.DAG;
11647  SDLoc dl(N);
11648 
11649  // If we're tracking CR bits, we need to be careful that we don't have:
11650  // zext(binary-ops(trunc(x), trunc(y)))
11651  // or
11652  // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
11653  // such that we're unnecessarily moving things into CR bits that can more
11654  // efficiently stay in GPRs. Note that if we're not certain that the high
11655  // bits are set as required by the final extension, we still may need to do
11656  // some masking to get the proper behavior.
11657 
11658  // This same functionality is important on PPC64 when dealing with
11659  // 32-to-64-bit extensions; these occur often when 32-bit values are used as
11660  // the return values of functions. Because it is so similar, it is handled
11661  // here as well.
11662 
11663  if (N->getValueType(0) != MVT::i32 &&
11664  N->getValueType(0) != MVT::i64)
11665  return SDValue();
11666 
11667  if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
11668  (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
11669  return SDValue();
11670 
11671  if (N->getOperand(0).getOpcode() != ISD::AND &&
11672  N->getOperand(0).getOpcode() != ISD::OR &&
11673  N->getOperand(0).getOpcode() != ISD::XOR &&
11674  N->getOperand(0).getOpcode() != ISD::SELECT &&
11675  N->getOperand(0).getOpcode() != ISD::SELECT_CC)
11676  return SDValue();
11677 
11678  SmallVector<SDValue, 4> Inputs;
11679  SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
11680  SmallPtrSet<SDNode *, 16> Visited;
11681 
11682  // Visit all inputs, collect all binary operations (and, or, xor and
11683  // select) that are all fed by truncations.
11684  while (!BinOps.empty()) {
11685  SDValue BinOp = BinOps.back();
11686  BinOps.pop_back();
11687 
11688  if (!Visited.insert(BinOp.getNode()).second)
11689  continue;
11690 
11691  PromOps.push_back(BinOp);
11692 
11693  for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
11694  // The condition of the select is not promoted.
11695  if (BinOp.getOpcode() == ISD::SELECT && i == 0)
11696  continue;
11697  if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
11698  continue;
11699 
11700  if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
11701  isa<ConstantSDNode>(BinOp.getOperand(i))) {
11702  Inputs.push_back(BinOp.getOperand(i));
11703  } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
11704  BinOp.getOperand(i).getOpcode() == ISD::OR ||
11705  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
11706  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
11707  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
11708  BinOps.push_back(BinOp.getOperand(i));
11709  } else {
11710  // We have an input that is not a truncation or another binary
11711  // operation; we'll abort this transformation.
11712  return SDValue();
11713  }
11714  }
11715  }
11716 
11717  // The operands of a select that must be truncated when the select is
11718  // promoted because the operand is actually part of the to-be-promoted set.
11719  DenseMap<SDNode *, EVT> SelectTruncOp[2];
11720 
11721  // Make sure that this is a self-contained cluster of operations (which
11722  // is not quite the same thing as saying that everything has only one
11723  // use).
11724  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
11725  if (isa<ConstantSDNode>(Inputs[i]))
11726  continue;
11727 
11728  for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
11729  UE = Inputs[i].getNode()->use_end();
11730  UI != UE; ++UI) {
11731  SDNode *User = *UI;
11732  if (User != N && !Visited.count(User))
11733  return SDValue();
11734 
11735  // If we're going to promote the non-output-value operand(s) or SELECT or
11736  // SELECT_CC, record them for truncation.
11737  if (User->getOpcode() == ISD::SELECT) {
11738  if (User->getOperand(0) == Inputs[i])
11739  SelectTruncOp[0].insert(std::make_pair(User,
11740  User->getOperand(0).getValueType()));
11741  } else if (User->getOpcode() == ISD::SELECT_CC) {
11742  if (User->getOperand(0) == Inputs[i])
11743  SelectTruncOp[0].insert(std::make_pair(User,
11744  User->getOperand(0).getValueType()));
11745  if (User->getOperand(1) == Inputs[i])
11746  SelectTruncOp[1].insert(std::make_pair(User,
11747  User->getOperand(1).getValueType()));
11748  }
11749  }
11750  }
11751 
11752  for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
11753  for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
11754  UE = PromOps[i].getNode()->use_end();
11755  UI != UE; ++UI) {
11756  SDNode *User = *UI;
11757  if (User != N && !Visited.count(User))
11758  return SDValue();
11759 
11760  // If we're going to promote the non-output-value operand(s) or SELECT or
11761  // SELECT_CC, record them for truncation.
11762  if (User->getOpcode() == ISD::SELECT) {
11763  if (User->getOperand(0) == PromOps[i])
11764  SelectTruncOp[0].insert(std::make_pair(User,
11765  User->getOperand(0).getValueType()));
11766  } else if (User->getOpcode() == ISD::SELECT_CC) {
11767  if (User->getOperand(0) == PromOps[i])
11768  SelectTruncOp[0].insert(std::make_pair(User,
11769  User->getOperand(0).getValueType()));
11770  if (User->getOperand(1) == PromOps[i])
11771  SelectTruncOp[1].insert(std::make_pair(User,
11772  User->getOperand(1).getValueType()));
11773  }
11774  }
11775  }
11776 
11777  unsigned PromBits = N->getOperand(0).getValueSizeInBits();
11778  bool ReallyNeedsExt = false;
11779  if (N->getOpcode() != ISD::ANY_EXTEND) {
11780  // If all of the inputs are not already sign/zero extended, then
11781  // we'll still need to do that at the end.
11782  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
11783  if (isa<ConstantSDNode>(Inputs[i]))
11784  continue;
11785 
11786  unsigned OpBits =
11787  Inputs[i].getOperand(0).getValueSizeInBits();
11788  assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
11789 
11790  if ((N->getOpcode() == ISD::ZERO_EXTEND &&
11791  !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
11792  APInt::getHighBitsSet(OpBits,
11793  OpBits-PromBits))) ||
11794  (N->getOpcode() == ISD::SIGN_EXTEND &&
11795  DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
11796  (OpBits-(PromBits-1)))) {
11797  ReallyNeedsExt = true;
11798  break;
11799  }
11800  }
11801  }
11802 
11803  // Replace all inputs, either with the truncation operand, or a
11804  // truncation or extension to the final output type.
11805  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
11806  // Constant inputs need to be replaced with the to-be-promoted nodes that
11807  // use them because they might have users outside of the cluster of
11808  // promoted nodes.
11809  if (isa<ConstantSDNode>(Inputs[i]))
11810  continue;
11811 
11812  SDValue InSrc = Inputs[i].getOperand(0);
11813  if (Inputs[i].getValueType() == N->getValueType(0))
11814  DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
11815  else if (N->getOpcode() == ISD::SIGN_EXTEND)
11816  DAG.ReplaceAllUsesOfValueWith(Inputs[i],
11817  DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
11818  else if (N->getOpcode() == ISD::ZERO_EXTEND)
11819  DAG.ReplaceAllUsesOfValueWith(Inputs[i],
11820  DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
11821  else
11822  DAG.ReplaceAllUsesOfValueWith(Inputs[i],
11823  DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
11824  }
11825 
11826  std::list<HandleSDNode> PromOpHandles;
11827  for (auto &PromOp : PromOps)
11828  PromOpHandles.emplace_back(PromOp);
11829 
11830  // Replace all operations (these are all the same, but have a different
11831  // (promoted) return type). DAG.getNode will validate that the types of
11832  // a binary operator match, so go through the list in reverse so that
11833  // we've likely promoted both operands first.
11834  while (!PromOpHandles.empty()) {
11835  SDValue PromOp = PromOpHandles.back().getValue();
11836  PromOpHandles.pop_back();
11837 
11838  unsigned C;
11839  switch (PromOp.getOpcode()) {
11840  default: C = 0; break;
11841  case ISD::SELECT: C = 1; break;
11842  case ISD::SELECT_CC: C = 2; break;
11843  }
11844 
11845  if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
11846  PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
11847  (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
11848  PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
11849  // The to-be-promoted operands of this node have not yet been
11850  // promoted (this should be rare because we're going through the
11851  // list backward, but if one of the operands has several users in
11852  // this cluster of to-be-promoted nodes, it is possible).
11853  PromOpHandles.emplace_front(PromOp);
11854  continue;
11855  }
11856 
11857  // For SELECT and SELECT_CC nodes, we do a similar check for any
11858  // to-be-promoted comparison inputs.
11859  if (PromOp.getOpcode() == ISD::SELECT ||
11860  PromOp.getOpcode() == ISD::SELECT_CC) {
11861  if ((SelectTruncOp[0].count(PromOp.getNode()) &&
11862  PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
11863  (SelectTruncOp[1].count(PromOp.getNode()) &&
11864  PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
11865  PromOpHandles.emplace_front(PromOp);
11866  continue;
11867  }
11868  }
11869 
11870  SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
11871  PromOp.getNode()->op_end());
11872 
11873  // If this node has constant inputs, then they'll need to be promoted here.
11874  for (unsigned i = 0; i < 2; ++i) {
11875  if (!isa<ConstantSDNode>(Ops[C+i]))
11876  continue;
11877  if (Ops[C+i].getValueType() == N->getValueType(0))
11878  continue;
11879 
11880  if (N->getOpcode() == ISD::SIGN_EXTEND)
11881  Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
11882  else if (N->getOpcode() == ISD::ZERO_EXTEND)
11883  Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
11884  else
11885  Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
11886  }
11887 
11888  // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
11889  // truncate them again to the original value type.
11890  if (PromOp.getOpcode() == ISD::SELECT ||
11891  PromOp.getOpcode() == ISD::SELECT_CC) {
11892  auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
11893  if (SI0 != SelectTruncOp[0].end())
11894  Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
11895  auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
11896  if (SI1 != SelectTruncOp[1].end())
11897  Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
11898  }
11899 
11900  DAG.ReplaceAllUsesOfValueWith(PromOp,
11901  DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
11902  }
11903 
11904  // Now we're left with the initial extension itself.
11905  if (!ReallyNeedsExt)
11906  return N->getOperand(0);
11907 
11908  // To zero extend, just mask off everything except for the first bit (in the
11909  // i1 case).
11910  if (N->getOpcode() == ISD::ZERO_EXTEND)
11911  return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
11913  N->getValueSizeInBits(0), PromBits),
11914  dl, N->getValueType(0)));
11915 
11916  assert(N->getOpcode() == ISD::SIGN_EXTEND &&
11917  "Invalid extension type");
11918  EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
11919  SDValue ShiftCst =
11920  DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
11921  return DAG.getNode(
11922  ISD::SRA, dl, N->getValueType(0),
11923  DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
11924  ShiftCst);
11925 }
11926 
11927 SDValue PPCTargetLowering::combineSetCC(SDNode *N,
11928  DAGCombinerInfo &DCI) const {
11929  assert(N->getOpcode() == ISD::SETCC &&
11930  "Should be called with a SETCC node");
11931 
11932  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
11933  if (CC == ISD::SETNE || CC == ISD::SETEQ) {
11934  SDValue LHS = N->getOperand(0);
11935  SDValue RHS = N->getOperand(1);
11936 
11937  // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
11938  if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
11939  LHS.hasOneUse())
11940  std::swap(LHS, RHS);
11941 
11942  // x == 0-y --> x+y == 0
11943  // x != 0-y --> x+y != 0
11944  if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
11945  RHS.hasOneUse()) {
11946  SDLoc DL(N);
11947  SelectionDAG &DAG = DCI.DAG;
11948  EVT VT = N->getValueType(0);
11949  EVT OpVT = LHS.getValueType();
11950  SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
11951  return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
11952  }
11953  }
11954 
11955  return DAGCombineTruncBoolExt(N, DCI);
11956 }
11957 
11958 // Is this an extending load from an f32 to an f64?
11959 static bool isFPExtLoad(SDValue Op) {
11960  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
11961  return LD->getExtensionType() == ISD::EXTLOAD &&
11962  Op.getValueType() == MVT::f64;
11963  return false;
11964 }
11965 
11966 /// Reduces the number of fp-to-int conversion when building a vector.
11967 ///
11968 /// If this vector is built out of floating to integer conversions,
11969 /// transform it to a vector built out of floating point values followed by a
11970 /// single floating to integer conversion of the vector.
11971 /// Namely (build_vector (fptosi $A), (fptosi $B), ...)
11972 /// becomes (fptosi (build_vector ($A, $B, ...)))
11973 SDValue PPCTargetLowering::
11974 combineElementTruncationToVectorTruncation(SDNode *N,
11975  DAGCombinerInfo &DCI) const {
11977  "Should be called with a BUILD_VECTOR node");
11978 
11979  SelectionDAG &DAG = DCI.DAG;
11980  SDLoc dl(N);
11981 
11982  SDValue FirstInput = N->getOperand(0);
11983  assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
11984  "The input operand must be an fp-to-int conversion.");
11985 
11986  // This combine happens after legalization so the fp_to_[su]i nodes are
11987  // already converted to PPCSISD nodes.
11988  unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
11989  if (FirstConversion == PPCISD::FCTIDZ ||
11990  FirstConversion == PPCISD::FCTIDUZ ||
11991  FirstConversion == PPCISD::FCTIWZ ||
11992  FirstConversion == PPCISD::FCTIWUZ) {
11993  bool IsSplat = true;
11994  bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
11995  FirstConversion == PPCISD::FCTIWUZ;
11996  EVT SrcVT = FirstInput.getOperand(0).getValueType();
11998  EVT TargetVT = N->getValueType(0);
11999  for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
12000  SDValue NextOp = N->getOperand(i);
12001  if (NextOp.getOpcode() != PPCISD::MFVSR)
12002  return SDValue();
12003  unsigned NextConversion = NextOp.getOperand(0).getOpcode();
12004  if (NextConversion != FirstConversion)
12005  return SDValue();
12006  // If we are converting to 32-bit integers, we need to add an FP_ROUND.
12007  // This is not valid if the input was originally double precision. It is
12008  // also not profitable to do unless this is an extending load in which
12009  // case doing this combine will allow us to combine consecutive loads.
12010  if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
12011  return SDValue();
12012  if (N->getOperand(i) != FirstInput)
12013  IsSplat = false;
12014  }
12015 
12016  // If this is a splat, we leave it as-is since there will be only a single
12017  // fp-to-int conversion followed by a splat of the integer. This is better
12018  // for 32-bit and smaller ints and neutral for 64-bit ints.
12019  if (IsSplat)
12020  return SDValue();
12021 
12022  // Now that we know we have the right type of node, get its operands
12023  for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
12024  SDValue In = N->getOperand(i).getOperand(0);
12025  if (Is32Bit) {
12026  // For 32-bit values, we need to add an FP_ROUND node (if we made it
12027  // here, we know that all inputs are extending loads so this is safe).
12028  if (In.isUndef())
12029  Ops.push_back(DAG.getUNDEF(SrcVT));
12030  else {
12031  SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
12032  MVT::f32, In.getOperand(0),
12033  DAG.getIntPtrConstant(1, dl));
12034  Ops.push_back(Trunc);
12035  }
12036  } else
12037  Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
12038  }
12039 
12040  unsigned Opcode;
12041  if (FirstConversion == PPCISD::FCTIDZ ||
12042  FirstConversion == PPCISD::FCTIWZ)
12043  Opcode = ISD::FP_TO_SINT;
12044  else
12045  Opcode = ISD::FP_TO_UINT;
12046 
12047  EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
12048  SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
12049  return DAG.getNode(Opcode, dl, TargetVT, BV);
12050  }
12051  return SDValue();
12052 }
12053 
12054 /// Reduce the number of loads when building a vector.
12055 ///
12056 /// Building a vector out of multiple loads can be converted to a load
12057 /// of the vector type if the loads are consecutive. If the loads are
12058 /// consecutive but in descending order, a shuffle is added at the end
12059 /// to reorder the vector.
12062  "Should be called with a BUILD_VECTOR node");
12063 
12064  SDLoc dl(N);
12065  bool InputsAreConsecutiveLoads = true;
12066  bool InputsAreReverseConsecutive = true;
12067  unsigned ElemSize = N->getValueType(0).getScalarSizeInBits() / 8;
12068  SDValue FirstInput = N->getOperand(0);
12069  bool IsRoundOfExtLoad = false;
12070 
12071  if (FirstInput.getOpcode() == ISD::FP_ROUND &&
12072  FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
12073  LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
12074  IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
12075  }
12076  // Not a build vector of (possibly fp_rounded) loads.
12077  if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
12078  N->getNumOperands() == 1)
12079  return SDValue();
12080 
12081  for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
12082  // If any inputs are fp_round(extload), they all must be.
12083  if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
12084  return SDValue();
12085 
12086  SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
12087  N->getOperand(i);
12088  if (NextInput.getOpcode() != ISD::LOAD)
12089  return SDValue();
12090 
12091  SDValue PreviousInput =
12092  IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
12093  LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
12094  LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
12095 
12096  // If any inputs are fp_round(extload), they all must be.
12097  if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
12098  return SDValue();
12099 
12100  if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
12101  InputsAreConsecutiveLoads = false;
12102  if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
12103  InputsAreReverseConsecutive = false;
12104 
12105  // Exit early if the loads are neither consecutive nor reverse consecutive.
12106  if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
12107  return SDValue();
12108  }
12109 
12110  assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
12111  "The loads cannot be both consecutive and reverse consecutive.");
12112 
12113  SDValue FirstLoadOp =
12114  IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
12115  SDValue LastLoadOp =
12116  IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
12117  N->getOperand(N->getNumOperands()-1);
12118 
12119  LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
12120  LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
12121  if (InputsAreConsecutiveLoads) {
12122  assert(LD1 && "Input needs to be a LoadSDNode.");
12123  return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
12124  LD1->getBasePtr(), LD1->getPointerInfo(),
12125  LD1->getAlignment());
12126  }
12127  if (InputsAreReverseConsecutive) {
12128  assert(LDL && "Input needs to be a LoadSDNode.");
12129  SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
12130  LDL->getBasePtr(), LDL->getPointerInfo(),
12131  LDL->getAlignment());
12133  for (int i = N->getNumOperands() - 1; i >= 0; i--)
12134  Ops.push_back(i);
12135 
12136  return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
12137  DAG.getUNDEF(N->getValueType(0)), Ops);
12138  }
12139  return SDValue();
12140 }
12141 
12142 // This function adds the required vector_shuffle needed to get
12143 // the elements of the vector extract in the correct position
12144 // as specified by the CorrectElems encoding.
12146  SDValue Input, uint64_t Elems,
12147  uint64_t CorrectElems) {
12148  SDLoc dl(N);
12149 
12150  unsigned NumElems = Input.getValueType().getVectorNumElements();
12151  SmallVector<int, 16> ShuffleMask(NumElems, -1);
12152 
12153  // Knowing the element indices being extracted from the original
12154  // vector and the order in which they're being inserted, just put
12155  // them at element indices required for the instruction.
12156  for (unsigned i = 0; i < N->getNumOperands(); i++) {
12157  if (DAG.getDataLayout().isLittleEndian())
12158  ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
12159  else
12160  ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
12161  CorrectElems = CorrectElems >> 8;
12162  Elems = Elems >> 8;
12163  }
12164 
12165  SDValue Shuffle =
12166  DAG.getVectorShuffle(Input.getValueType(), dl, Input,
12167  DAG.getUNDEF(Input.getValueType()), ShuffleMask);
12168 
12169  EVT Ty = N->getValueType(0);
12170  SDValue BV = DAG.getNode(PPCISD::SExtVElems, dl, Ty, Shuffle);
12171  return BV;
12172 }
12173 
12174 // Look for build vector patterns where input operands come from sign
12175 // extended vector_extract elements of specific indices. If the correct indices
12176 // aren't used, add a vector shuffle to fix up the indices and create a new
12177 // PPCISD:SExtVElems node which selects the vector sign extend instructions
12178 // during instruction selection.
12180  // This array encodes the indices that the vector sign extend instructions
12181  // extract from when extending from one type to another for both BE and LE.
12182  // The right nibble of each byte corresponds to the LE incides.
12183  // and the left nibble of each byte corresponds to the BE incides.
12184  // For example: 0x3074B8FC byte->word
12185  // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
12186  // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
12187  // For example: 0x000070F8 byte->double word
12188  // For LE: the allowed indices are: 0x0,0x8
12189  // For BE: the allowed indices are: 0x7,0xF
12190  uint64_t TargetElems[] = {
12191  0x3074B8FC, // b->w
12192  0x000070F8, // b->d
12193  0x10325476, // h->w
12194  0x00003074, // h->d
12195  0x00001032, // w->d
12196  };
12197 
12198  uint64_t Elems = 0;
12199  int Index;
12200  SDValue Input;
12201 
12202  auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
12203  if (!Op)
12204  return false;
12205  if (Op.getOpcode() != ISD::SIGN_EXTEND &&
12207  return false;
12208 
12209  // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
12210  // of the right width.
12211  SDValue Extract = Op.getOperand(0);
12212  if (Extract.getOpcode() == ISD::ANY_EXTEND)
12213  Extract = Extract.getOperand(0);
12214  if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12215  return false;
12216 
12217  ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
12218  if (!ExtOp)
12219  return false;
12220 
12221  Index = ExtOp->getZExtValue();
12222  if (Input && Input != Extract.getOperand(0))
12223  return false;
12224 
12225  if (!Input)
12226  Input = Extract.getOperand(0);
12227 
12228  Elems = Elems << 8;
12229  Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
12230  Elems |= Index;
12231 
12232  return true;
12233  };
12234 
12235  // If the build vector operands aren't sign extended vector extracts,
12236  // of the same input vector, then return.
12237  for (unsigned i = 0; i < N->getNumOperands(); i++) {
12238  if (!isSExtOfVecExtract(N->getOperand(i))) {
12239  return SDValue();
12240  }
12241  }
12242 
12243  // If the vector extract indicies are not correct, add the appropriate
12244  // vector_shuffle.
12245  int TgtElemArrayIdx;
12246  int InputSize = Input.getValueType().getScalarSizeInBits();
12247  int OutputSize = N->getValueType(0).getScalarSizeInBits();
12248  if (InputSize + OutputSize == 40)
12249  TgtElemArrayIdx = 0;
12250  else if (InputSize + OutputSize == 72)
12251  TgtElemArrayIdx = 1;
12252  else if (InputSize + OutputSize == 48)
12253  TgtElemArrayIdx = 2;
12254  else if (InputSize + OutputSize == 80)
12255  TgtElemArrayIdx = 3;
12256  else if (InputSize + OutputSize == 96)
12257  TgtElemArrayIdx = 4;
12258  else
12259  return SDValue();
12260 
12261  uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
12262  CorrectElems = DAG.getDataLayout().isLittleEndian()
12263  ? CorrectElems & 0x0F0F0F0F0F0F0F0F
12264  : CorrectElems & 0xF0F0F0F0F0F0F0F0;
12265  if (Elems != CorrectElems) {
12266  return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
12267  }
12268 
12269  // Regular lowering will catch cases where a shuffle is not needed.
12270  return SDValue();
12271 }
12272 
12273 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
12274  DAGCombinerInfo &DCI) const {
12276  "Should be called with a BUILD_VECTOR node");
12277 
12278  SelectionDAG &DAG = DCI.DAG;
12279  SDLoc dl(N);
12280 
12281  if (!Subtarget.hasVSX())
12282  return SDValue();
12283 
12284  // The target independent DAG combiner will leave a build_vector of
12285  // float-to-int conversions intact. We can generate MUCH better code for
12286  // a float-to-int conversion of a vector of floats.
12287  SDValue FirstInput = N->getOperand(0);
12288  if (FirstInput.getOpcode() == PPCISD::MFVSR) {
12289  SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
12290  if (Reduced)
12291  return Reduced;
12292  }
12293 
12294  // If we're building a vector out of consecutive loads, just load that
12295  // vector type.
12296  SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
12297  if (Reduced)
12298  return Reduced;
12299 
12300  // If we're building a vector out of extended elements from another vector
12301  // we have P9 vector integer extend instructions. The code assumes legal
12302  // input types (i.e. it can't handle things like v4i16) so do not run before
12303  // legalization.
12304  if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
12305  Reduced = combineBVOfVecSExt(N, DAG);
12306  if (Reduced)
12307  return Reduced;
12308  }
12309 
12310 
12311  if (N->getValueType(0) != MVT::v2f64)
12312  return SDValue();
12313 
12314  // Looking for:
12315  // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
12316  if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
12317  FirstInput.getOpcode() != ISD::UINT_TO_FP)
12318  return SDValue();
12319  if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
12321  return SDValue();
12322  if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
12323  return SDValue();
12324 
12325  SDValue Ext1 = FirstInput.getOperand(0);
12326  SDValue Ext2 = N->getOperand(1).getOperand(0);
12327  if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
12329  return SDValue();
12330 
12331  ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
12332  ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
12333  if (!Ext1Op || !Ext2Op)
12334  return SDValue();
12335  if (Ext1.getValueType() != MVT::i32 ||
12336  Ext2.getValueType() != MVT::i32)
12337  if (Ext1.getOperand(0) != Ext2.getOperand(0))
12338  return SDValue();
12339 
12340  int FirstElem = Ext1Op->getZExtValue();
12341  int SecondElem = Ext2Op->getZExtValue();
12342  int SubvecIdx;
12343  if (FirstElem == 0 && SecondElem == 1)
12344  SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
12345  else if (FirstElem == 2 && SecondElem == 3)
12346  SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
12347  else
12348  return SDValue();
12349 
12350  SDValue SrcVec = Ext1.getOperand(0);
12351  auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
12353  return DAG.getNode(NodeType, dl, MVT::v2f64,
12354  SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
12355 }
12356 
12357 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
12358  DAGCombinerInfo &DCI) const {
12359  assert((N->getOpcode() == ISD::SINT_TO_FP ||
12360  N->getOpcode() == ISD::UINT_TO_FP) &&
12361  "Need an int -> FP conversion node here");
12362 
12363  if (useSoftFloat() || !Subtarget.has64BitSupport())
12364  return SDValue();
12365 
12366  SelectionDAG &DAG = DCI.DAG;
12367  SDLoc dl(N);
12368  SDValue Op(N, 0);
12369 
12370  // Don't handle ppc_fp128 here or conversions that are out-of-range capable
12371  // from the hardware.
12372  if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
12373  return SDValue();
12374  if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
12376  return SDValue();
12377 
12378  SDValue FirstOperand(Op.getOperand(0));
12379  bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
12380  (FirstOperand.getValueType() == MVT::i8 ||
12381  FirstOperand.getValueType() == MVT::i16);
12382  if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
12383  bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
12384  bool DstDouble = Op.getValueType() == MVT::f64;
12385  unsigned ConvOp = Signed ?
12386  (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
12387  (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
12388  SDValue WidthConst =
12389  DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
12390  dl, false);
12391  LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
12392  SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
12395  Ops, MVT::i8, LDN->getMemOperand());
12396 
12397  // For signed conversion, we need to sign-extend the value in the VSR
12398  if (Signed) {
12399  SDValue ExtOps[] = { Ld, WidthConst };
12400  SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
12401  return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
12402  } else
12403  return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
12404  }
12405 
12406 
12407  // For i32 intermediate values, unfortunately, the conversion functions
12408  // leave the upper 32 bits of the value are undefined. Within the set of
12409  // scalar instructions, we have no method for zero- or sign-extending the
12410  // value. Thus, we cannot handle i32 intermediate values here.
12411  if (Op.getOperand(0).getValueType() == MVT::i32)
12412  return SDValue();
12413 
12414  assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
12415  "UINT_TO_FP is supported only with FPCVT");
12416 
12417  // If we have FCFIDS, then use it when converting to single-precision.
12418  // Otherwise, convert to double-precision and then round.
12419  unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
12421  : PPCISD::FCFIDS)
12423  : PPCISD::FCFID);
12424  MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
12425  ? MVT::f32
12426  : MVT::f64;
12427 
12428  // If we're converting from a float, to an int, and back to a float again,
12429  // then we don't need the store/load pair at all.
12430  if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
12431  Subtarget.hasFPCVT()) ||
12432  (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
12433  SDValue Src = Op.getOperand(0).getOperand(0);
12434  if (Src.getValueType() == MVT::f32) {
12435  Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
12436  DCI.AddToWorklist(Src.getNode());
12437  } else if (Src.getValueType() != MVT::f64) {
12438  // Make sure that we don't pick up a ppc_fp128 source value.
12439  return SDValue();
12440  }
12441 
12442  unsigned FCTOp =
12445 
12446  SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
12447  SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
12448 
12449  if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
12450  FP = DAG.getNode(ISD::FP_ROUND, dl,
12451  MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
12452  DCI.AddToWorklist(FP.getNode());
12453  }
12454 
12455  return FP;
12456  }
12457 
12458  return SDValue();
12459 }
12460 
12461 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
12462 // builtins) into loads with swaps.
12464  DAGCombinerInfo &DCI) const {
12465  SelectionDAG &DAG = DCI.DAG;
12466  SDLoc dl(N);
12467  SDValue Chain;
12468  SDValue Base;
12469  MachineMemOperand *MMO;
12470 
12471  switch (N->getOpcode()) {
12472  default:
12473  llvm_unreachable("Unexpected opcode for little endian VSX load");
12474  case ISD::LOAD: {
12475  LoadSDNode *LD = cast<LoadSDNode>(N);
12476  Chain = LD->getChain();
12477  Base = LD->getBasePtr();
12478  MMO = LD->getMemOperand();
12479  // If the MMO suggests this isn't a load of a full vector, leave
12480  // things alone. For a built-in, we have to make the change for
12481  // correctness, so if there is a size problem that will be a bug.
12482  if (MMO->getSize() < 16)
12483  return SDValue();
12484  break;
12485  }
12486  case ISD::INTRINSIC_W_CHAIN: {
12487  MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
12488  Chain = Intrin->getChain();
12489  // Similarly to the store case below, Intrin->getBasePtr() doesn't get
12490  // us what we want. Get operand 2 instead.
12491  Base = Intrin->getOperand(2);
12492  MMO = Intrin->getMemOperand();
12493  break;
12494  }
12495  }
12496 
12497  MVT VecTy = N->getValueType(0).getSimpleVT();
12498 
12499  // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
12500  // aligned and the type is a vector with elements up to 4 bytes
12501  if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
12502  && VecTy.getScalarSizeInBits() <= 32 ) {
12503  return SDValue();
12504  }
12505 
12506  SDValue LoadOps[] = { Chain, Base };
12509  LoadOps, MVT::v2f64, MMO);
12510 
12511  DCI.AddToWorklist(Load.getNode());
12512  Chain = Load.getValue(1);
12513  SDValue Swap = DAG.getNode(
12514  PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
12515  DCI.AddToWorklist(Swap.getNode());
12516 
12517  // Add a bitcast if the resulting load type doesn't match v2f64.
12518  if (VecTy != MVT::v2f64) {
12519  SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
12520  DCI.AddToWorklist(N.getNode());
12521  // Package {bitcast value, swap's chain} to match Load's shape.
12522  return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
12523  N, Swap.getValue(1));
12524  }
12525 
12526  return Swap;
12527 }
12528 
12529 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
12530 // builtins) into stores with swaps.
12532  DAGCombinerInfo &DCI) const {
12533  SelectionDAG &DAG = DCI.DAG;
12534  SDLoc dl(N);
12535  SDValue Chain;
12536  SDValue Base;
12537  unsigned SrcOpnd;
12538  MachineMemOperand *MMO;
12539 
12540  switch (N->getOpcode()) {
12541  default:
12542  llvm_unreachable("Unexpected opcode for little endian VSX store");
12543  case ISD::STORE: {
12544  StoreSDNode *ST = cast<StoreSDNode>(N);
12545  Chain = ST->getChain();
12546  Base = ST->getBasePtr();
12547  MMO = ST->getMemOperand();
12548  SrcOpnd = 1;
12549  // If the MMO suggests this isn't a store of a full vector, leave
12550  // things alone. For a built-in, we have to make the change for
12551  // correctness, so if there is a size problem that will be a bug.
12552  if (MMO->getSize() < 16)
12553  return SDValue();
12554  break;
12555  }
12556  case ISD::INTRINSIC_VOID: {
12557  MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
12558  Chain = Intrin->getChain();
12559  // Intrin->getBasePtr() oddly does not get what we want.
12560  Base = Intrin->getOperand(3);
12561  MMO = Intrin->getMemOperand();
12562  SrcOpnd = 2;
12563  break;
12564  }
12565  }
12566 
12567  SDValue Src = N->getOperand(SrcOpnd);
12568  MVT VecTy = Src.getValueType().getSimpleVT();
12569 
12570  // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
12571  // aligned and the type is a vector with elements up to 4 bytes
12572  if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
12573  && VecTy.getScalarSizeInBits() <= 32 ) {
12574  return SDValue();
12575  }
12576 
12577  // All stores are done as v2f64 and possible bit cast.
12578  if (VecTy != MVT::v2f64) {
12579  Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
12580  DCI.AddToWorklist(Src.getNode());
12581  }
12582 
12583  SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
12584  DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
12585  DCI.AddToWorklist(Swap.getNode());
12586  Chain = Swap.getValue(1);
12587  SDValue StoreOps[] = { Chain, Swap, Base };
12589  DAG.getVTList(MVT::Other),
12590  StoreOps, VecTy, MMO);
12591  DCI.AddToWorklist(Store.getNode());
12592  return Store;
12593 }
12594 
12595 // Handle DAG combine for STORE (FP_TO_INT F).
12596 SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
12597  DAGCombinerInfo &DCI) const {
12598 
12599  SelectionDAG &DAG = DCI.DAG;
12600  SDLoc dl(N);
12601  unsigned Opcode = N->getOperand(1).getOpcode();
12602 
12603  assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT)
12604  && "Not a FP_TO_INT Instruction!");
12605 
12606  SDValue Val = N->getOperand(1).getOperand(0);
12607  EVT Op1VT = N->getOperand(1).getValueType();
12608  EVT ResVT = Val.getValueType();
12609 
12610  // Floating point types smaller than 32 bits are not legal on Power.
12611  if (ResVT.getScalarSizeInBits() < 32)
12612  return SDValue();
12613 
12614  // Only perform combine for conversion to i64/i32 or power9 i16/i8.
12615  bool ValidTypeForStoreFltAsInt =
12616  (Op1VT == MVT::i32 || Op1VT == MVT::i64 ||
12617  (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
12618 
12619  if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Altivec() ||
12620  cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
12621  return SDValue();
12622 
12623  // Extend f32 values to f64
12624  if (ResVT.getScalarSizeInBits() == 32) {
12625  Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
12626  DCI.AddToWorklist(Val.getNode());
12627  }
12628 
12629  // Set signed or unsigned conversion opcode.
12630  unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?
12633 
12634  Val = DAG.getNode(ConvOpcode,
12635  dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val);
12636  DCI.AddToWorklist(Val.getNode());
12637 
12638  // Set number of bytes being converted.
12639  unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
12640  SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),
12641  DAG.getIntPtrConstant(ByteSize, dl, false),
12642  DAG.getValueType(Op1VT) };
12643 
12645  DAG.getVTList(MVT::Other), Ops,
12646  cast<StoreSDNode>(N)->getMemoryVT(),
12647  cast<StoreSDNode>(N)->getMemOperand());
12648 
12649  DCI.AddToWorklist(Val.getNode());
12650  return Val;
12651 }
12652 
12654  DAGCombinerInfo &DCI) const {
12655  SelectionDAG &DAG = DCI.DAG;
12656  SDLoc dl(N);
12657  switch (N->getOpcode()) {
12658  default: break;
12659  case ISD::ADD:
12660  return combineADD(N, DCI);
12661  case ISD::SHL:
12662  return combineSHL(N, DCI);
12663  case ISD::SRA:
12664  return combineSRA(N, DCI);
12665  case ISD::SRL:
12666  return combineSRL(N, DCI);
12667  case PPCISD::SHL:
12668  if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
12669  return N->getOperand(0);
12670  break;
12671  case PPCISD::SRL:
12672  if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
12673  return N->getOperand(0);
12674  break;
12675  case PPCISD::SRA:
12676  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
12677  if (C->isNullValue() || // 0 >>s V -> 0.
12678  C->isAllOnesValue()) // -1 >>s V -> -1.
12679  return N->getOperand(0);
12680  }
12681  break;
12682  case ISD::SIGN_EXTEND:
12683  case ISD::ZERO_EXTEND:
12684  case ISD::ANY_EXTEND:
12685  return DAGCombineExtBoolTrunc(N, DCI);
12686  case ISD::TRUNCATE:
12687  return combineTRUNCATE(N, DCI);
12688  case ISD::SETCC:
12689  if (SDValue CSCC = combineSetCC(N, DCI))
12690  return CSCC;
12692  case ISD::SELECT_CC:
12693  return DAGCombineTruncBoolExt(N, DCI);
12694  case ISD::SINT_TO_FP:
12695  case ISD::UINT_TO_FP:
12696  return combineFPToIntToFP(N, DCI);
12697  case ISD::STORE: {
12698 
12699  EVT Op1VT = N->getOperand(1).getValueType();
12700  unsigned Opcode = N->getOperand(1).getOpcode();
12701 
12702  if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) {
12703  SDValue Val= combineStoreFPToInt(N, DCI);
12704  if (Val)
12705  return Val;
12706  }
12707 
12708  // Turn STORE (BSWAP) -> sthbrx/stwbrx.
12709  if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
12710  N->getOperand(1).getNode()->hasOneUse() &&
12711  (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
12712  (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
12713 
12714  // STBRX can only handle simple types and it makes no sense to store less
12715  // two bytes in byte-reversed order.
12716  EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
12717  if (mVT.isExtended() || mVT.getSizeInBits() < 16)
12718  break;
12719 
12720  SDValue BSwapOp = N->getOperand(1).getOperand(0);
12721  // Do an any-extend to 32-bits if this is a half-word input.
12722  if (BSwapOp.getValueType() == MVT::i16)
12723  BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
12724 
12725  // If the type of BSWAP operand is wider than stored memory width
12726  // it need to be shifted to the right side before STBRX.
12727  if (Op1VT.bitsGT(mVT)) {
12728  int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
12729  BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
12730  DAG.getConstant(Shift, dl, MVT::i32));
12731  // Need to truncate if this is a bswap of i64 stored as i32/i16.
12732  if (Op1VT == MVT::i64)
12733  BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
12734  }
12735 
12736  SDValue Ops[] = {
12737  N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
12738  };
12739  return
12741  Ops, cast<StoreSDNode>(N)->getMemoryVT(),
12742  cast<StoreSDNode>(N)->getMemOperand());
12743  }
12744 
12745  // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
12746  // So it can increase the chance of CSE constant construction.
12747  if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
12748  isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
12749  // Need to sign-extended to 64-bits to handle negative values.
12750  EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
12751  uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
12752  MemVT.getSizeInBits());
12753  SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
12754 
12755  // DAG.getTruncStore() can't be used here because it doesn't accept
12756  // the general (base + offset) addressing mode.
12757  // So we use UpdateNodeOperands and setTruncatingStore instead.
12758  DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
12759  N->getOperand(3));
12760  cast<StoreSDNode>(N)->setTruncatingStore(true);
12761  return SDValue(N, 0);
12762  }
12763 
12764  // For little endian, VSX stores require generating xxswapd/lxvd2x.
12765  // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
12766  if (Op1VT.isSimple()) {
12767  MVT StoreVT = Op1VT.getSimpleVT();
12768  if (Subtarget.needsSwapsForVSXMemOps() &&
12769  (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
12770  StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
12771  return expandVSXStoreForLE(N, DCI);
12772  }
12773  break;
12774  }
12775  case ISD::LOAD: {
12776  LoadSDNode *LD = cast<LoadSDNode>(N);
12777  EVT VT = LD->getValueType(0);
12778 
12779  // For little endian, VSX loads require generating lxvd2x/xxswapd.
12780  // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
12781  if (VT.isSimple()) {
12782  MVT LoadVT = VT.getSimpleVT();
12783  if (Subtarget.needsSwapsForVSXMemOps() &&
12784  (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
12785  LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
12786  return expandVSXLoadForLE(N, DCI);
12787  }
12788 
12789  // We sometimes end up with a 64-bit integer load, from which we extract
12790  // two single-precision floating-point numbers. This happens with
12791  // std::complex<float>, and other similar structures, because of the way we
12792  // canonicalize structure copies. However, if we lack direct moves,
12793  // then the final bitcasts from the extracted integer values to the
12794  // floating-point numbers turn into store/load pairs. Even with direct moves,
12795  // just loading the two floating-point numbers is likely better.
12796  auto ReplaceTwoFloatLoad = [&]() {
12797  if (VT != MVT::i64)
12798  return false;
12799 
12800  if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
12801  LD->isVolatile())
12802  return false;
12803 
12804  // We're looking for a sequence like this:
12805  // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
12806  // t16: i64 = srl t13, Constant:i32<32>
12807  // t17: i32 = truncate t16
12808  // t18: f32 = bitcast t17
12809  // t19: i32 = truncate t13
12810  // t20: f32 = bitcast t19
12811 
12812  if (!LD->hasNUsesOfValue(2, 0))
12813  return false;
12814 
12815  auto UI = LD->use_begin();
12816  while (UI.getUse().getResNo() != 0) ++UI;
12817  SDNode *Trunc = *UI++;
12818  while (UI.getUse().getResNo() != 0) ++UI;
12819  SDNode *RightShift = *UI;
12820  if (Trunc->getOpcode() != ISD::TRUNCATE)
12821  std::swap(Trunc, RightShift);
12822 
12823  if (Trunc->getOpcode() != ISD::TRUNCATE ||
12824  Trunc->getValueType(0) != MVT::i32 ||
12825  !Trunc->hasOneUse())
12826  return false;
12827  if (RightShift->getOpcode() != ISD::SRL ||
12828  !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
12829  RightShift->getConstantOperandVal(1) != 32 ||
12830  !RightShift->hasOneUse())
12831  return false;
12832 
12833  SDNode *Trunc2 = *RightShift->use_begin();
12834  if (Trunc2->getOpcode() != ISD::TRUNCATE ||
12835  Trunc2->getValueType(0) != MVT::i32 ||
12836  !Trunc2->hasOneUse())
12837  return false;
12838 
12839  SDNode *Bitcast = *Trunc->use_begin();
12840  SDNode *Bitcast2 = *Trunc2->use_begin();
12841 
12842  if (Bitcast->getOpcode() != ISD::BITCAST ||
12843  Bitcast->getValueType(0) != MVT::f32)
12844  return false;
12845  if (Bitcast2->getOpcode() != ISD::BITCAST ||
12846  Bitcast2->getValueType(0) != MVT::f32)
12847  return false;
12848 
12849  if (Subtarget.isLittleEndian())
12850  std::swap(Bitcast, Bitcast2);
12851 
12852  // Bitcast has the second float (in memory-layout order) and Bitcast2
12853  // has the first one.
12854 
12855  SDValue BasePtr = LD->getBasePtr();
12856  if (LD->isIndexed()) {
12858  "Non-pre-inc AM on PPC?");
12859  BasePtr =
12860  DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12861  LD->getOffset());
12862  }
12863 
12864  auto MMOFlags =
12865  LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
12866  SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
12867  LD->getPointerInfo(), LD->getAlignment(),
12868  MMOFlags, LD->getAAInfo());
12869  SDValue AddPtr =
12870  DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
12871  BasePtr, DAG.getIntPtrConstant(4, dl));
12872  SDValue FloatLoad2 = DAG.getLoad(
12873  MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
12874  LD->getPointerInfo().getWithOffset(4),
12875  MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
12876 
12877  if (LD->isIndexed()) {
12878  // Note that DAGCombine should re-form any pre-increment load(s) from
12879  // what is produced here if that makes sense.
12880  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
12881  }
12882 
12883  DCI.CombineTo(Bitcast2, FloatLoad);
12884  DCI.CombineTo(Bitcast, FloatLoad2);
12885 
12886  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
12887  SDValue(FloatLoad2.getNode(), 1));
12888  return true;
12889  };
12890 
12891  if (ReplaceTwoFloatLoad())
12892  return SDValue(N, 0);
12893 
12894  EVT MemVT = LD->getMemoryVT();
12895  Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
12896  unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
12897  Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
12898  unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy);
12899  if (LD->isUnindexed() && VT.isVector() &&
12900  ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
12901  // P8 and later hardware should just use LOAD.
12902  !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 ||
12903  VT == MVT::v4i32 || VT == MVT::v4f32)) ||
12904  (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) &&
12905  LD->getAlignment() >= ScalarABIAlignment)) &&
12906  LD->getAlignment() < ABIAlignment) {
12907  // This is a type-legal unaligned Altivec or QPX load.
12908  SDValue Chain = LD->getChain();
12909  SDValue Ptr = LD->getBasePtr();
12910  bool isLittleEndian = Subtarget.isLittleEndian();
12911 
12912  // This implements the loading of unaligned vectors as described in
12913  // the venerable Apple Velocity Engine overview. Specifically:
12914  // https://developer.apple.com/hardwaredrivers/ve/alignment.html
12915  // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
12916  //
12917  // The general idea is to expand a sequence of one or more unaligned
12918  // loads into an alignment-based permutation-control instruction (lvsl
12919  // or lvsr), a series of regular vector loads (which always truncate
12920  // their input address to an aligned address), and a series of
12921  // permutations. The results of these permutations are the requested
12922  // loaded values. The trick is that the last "extra" load is not taken
12923  // from the address you might suspect (sizeof(vector) bytes after the
12924  // last requested load), but rather sizeof(vector) - 1 bytes after the
12925  // last requested vector. The point of this is to avoid a page fault if
12926  // the base address happened to be aligned. This works because if the
12927  // base address is aligned, then adding less than a full vector length
12928  // will cause the last vector in the sequence to be (re)loaded.
12929  // Otherwise, the next vector will be fetched as you might suspect was
12930  // necessary.
12931 
12932  // We might be able to reuse the permutation generation from
12933  // a different base address offset from this one by an aligned amount.
12934  // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
12935  // optimization later.
12936  Intrinsic::ID Intr, IntrLD, IntrPerm;
12937  MVT PermCntlTy, PermTy, LDTy;
12938  if (Subtarget.hasAltivec()) {
12939  Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr :
12941  IntrLD = Intrinsic::ppc_altivec_lvx;
12942  IntrPerm = Intrinsic::ppc_altivec_vperm;
12943  PermCntlTy = MVT::v16i8;
12944  PermTy = MVT::v4i32;
12945  LDTy = MVT::v4i32;
12946  } else {
12947  Intr = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld :
12949  IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd :
12951  IntrPerm = Intrinsic::ppc_qpx_qvfperm;
12952  PermCntlTy = MVT::v4f64;
12953  PermTy = MVT::v4f64;
12954  LDTy = MemVT.getSimpleVT();
12955  }
12956 
12957  SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
12958 
12959  // Create the new MMO for the new base load. It is like the original MMO,
12960  // but represents an area in memory almost twice the vector size centered
12961  // on the original address. If the address is unaligned, we might start
12962  // reading up to (sizeof(vector)-1) bytes below the address of the
12963  // original unaligned load.
12964  MachineFunction &MF = DAG.getMachineFunction();
12965  MachineMemOperand *BaseMMO =
12967  -(long)MemVT.getStoreSize()+1,
12968  2*MemVT.getStoreSize()-1);
12969 
12970  // Create the new base load.
12971  SDValue LDXIntID =
12972  DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
12973  SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
12974  SDValue BaseLoad =
12976  DAG.getVTList(PermTy, MVT::Other),
12977  BaseLoadOps, LDTy, BaseMMO);
12978 
12979  // Note that the value of IncOffset (which is provided to the next
12980  // load's pointer info offset value, and thus used to calculate the
12981  // alignment), and the value of IncValue (which is actually used to
12982  // increment the pointer value) are different! This is because we
12983  // require the next load to appear to be aligned, even though it
12984  // is actually offset from the base pointer by a lesser amount.
12985  int IncOffset = VT.getSizeInBits() / 8;
12986  int IncValue = IncOffset;
12987 
12988  // Walk (both up and down) the chain looking for another load at the real
12989  // (aligned) offset (the alignment of the other load does not matter in
12990  // this case). If found, then do not use the offset reduction trick, as
12991  // that will prevent the loads from being later combined (as they would
12992  // otherwise be duplicates).
12993  if (!findConsecutiveLoad(LD, DAG))
12994  --IncValue;
12995 
12996  SDValue Increment =
12997  DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
12998  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
12999 
13000  MachineMemOperand *ExtraMMO =
13002  1, 2*MemVT.getStoreSize()-1);
13003  SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
13004  SDValue ExtraLoad =
13006  DAG.getVTList(PermTy, MVT::Other),
13007  ExtraLoadOps, LDTy, ExtraMMO);
13008 
13010  BaseLoad.getValue(1), ExtraLoad.getValue(1));
13011 
13012  // Because vperm has a big-endian bias, we must reverse the order
13013  // of the input vectors and complement the permute control vector
13014  // when generating little endian code. We have already handled the
13015  // latter by using lvsr instead of lvsl, so just reverse BaseLoad
13016  // and ExtraLoad here.
13017  SDValue Perm;
13018  if (isLittleEndian)
13019  Perm = BuildIntrinsicOp(IntrPerm,
13020  ExtraLoad, BaseLoad, PermCntl, DAG, dl);
13021  else
13022  Perm = BuildIntrinsicOp(IntrPerm,
13023  BaseLoad, ExtraLoad, PermCntl, DAG, dl);
13024 
13025  if (VT != PermTy)
13026  Perm = Subtarget.hasAltivec() ?
13027  DAG.getNode(ISD::BITCAST, dl, VT, Perm) :
13028  DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX
13029  DAG.getTargetConstant(1, dl, MVT::i64));
13030  // second argument is 1 because this rounding
13031  // is always exact.
13032 
13033  // The output of the permutation is our loaded result, the TokenFactor is
13034  // our new chain.
13035  DCI.CombineTo(N, Perm, TF);
13036  return SDValue(N, 0);
13037  }
13038  }
13039  break;
13040  case ISD::INTRINSIC_WO_CHAIN: {
13041  bool isLittleEndian = Subtarget.isLittleEndian();
13042  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
13043  Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
13045  if ((IID == Intr ||
13046  IID == Intrinsic::ppc_qpx_qvlpcld ||
13047  IID == Intrinsic::ppc_qpx_qvlpcls) &&
13048  N->getOperand(1)->getOpcode() == ISD::ADD) {
13049  SDValue Add = N->getOperand(1);
13050 
13051  int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
13052  5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
13053 
13054  if (DAG.MaskedValueIsZero(Add->getOperand(1),
13055  APInt::getAllOnesValue(Bits /* alignment */)
13056  .zext(Add.getScalarValueSizeInBits()))) {
13057  SDNode *BasePtr = Add->getOperand(0).getNode();
13058  for (SDNode::use_iterator UI = BasePtr->use_begin(),
13059  UE = BasePtr->use_end();
13060  UI != UE; ++UI) {
13061  if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
13062  cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) {
13063  // We've found another LVSL/LVSR, and this address is an aligned
13064  // multiple of that one. The results will be the same, so use the
13065  // one we've just found instead.
13066 
13067  return SDValue(*UI, 0);
13068  }
13069  }
13070  }
13071 
13072  if (isa<ConstantSDNode>(Add->getOperand(1))) {
13073  SDNode *BasePtr = Add->getOperand(0).getNode();
13074  for (SDNode::use_iterator UI = BasePtr->use_begin(),
13075  UE = BasePtr->use_end(); UI != UE; ++UI) {
13076  if (UI->getOpcode() == ISD::ADD &&
13077  isa<ConstantSDNode>(UI->getOperand(1)) &&
13078  (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
13079  cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
13080  (1ULL << Bits) == 0) {
13081  SDNode *OtherAdd = *UI;
13082  for (SDNode::use_iterator VI = OtherAdd->use_begin(),
13083  VE = OtherAdd->use_end(); VI != VE; ++VI) {
13084  if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
13085  cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
13086  return SDValue(*VI, 0);
13087  }
13088  }
13089  }
13090  }
13091  }
13092  }
13093 
13094  // Combine vmaxsw/h/b(a, a's negation) to abs(a)
13095  // Expose the vabsduw/h/b opportunity for down stream
13096  if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
13100  SDValue V1 = N->getOperand(1);
13101  SDValue V2 = N->getOperand(2);
13102  if ((V1.getSimpleValueType() == MVT::v4i32 ||
13103  V1.getSimpleValueType() == MVT::v8i16 ||
13104  V1.getSimpleValueType() == MVT::v16i8) &&
13105  V1.getSimpleValueType() == V2.getSimpleValueType()) {
13106  // (0-a, a)
13107  if (V1.getOpcode() == ISD::SUB &&
13109  V1.getOperand(1) == V2) {
13110  return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
13111  }
13112  // (a, 0-a)
13113  if (V2.getOpcode() == ISD::SUB &&
13115  V2.getOperand(1) == V1) {
13116  return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
13117  }
13118  // (x-y, y-x)
13119  if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
13120  V1.getOperand(0) == V2.getOperand(1) &&
13121  V1.getOperand(1) == V2.getOperand(0)) {
13122  return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
13123  }
13124  }
13125  }
13126  }
13127 
13128  break;
13130  // For little endian, VSX loads require generating lxvd2x/xxswapd.
13131  // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
13132  if (Subtarget.needsSwapsForVSXMemOps()) {
13133  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
13134  default:
13135  break;
13138  return expandVSXLoadForLE(N, DCI);
13139  }
13140  }
13141  break;
13142  case ISD::INTRINSIC_VOID:
13143  // For little endian, VSX stores require generating xxswapd/stxvd2x.
13144  // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
13145  if (Subtarget.needsSwapsForVSXMemOps()) {
13146  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
13147  default:
13148  break;
13151  return expandVSXStoreForLE(N, DCI);
13152  }
13153  }
13154  break;
13155  case ISD::BSWAP:
13156  // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
13157  if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
13158  N->getOperand(0).hasOneUse() &&
13159  (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
13160  (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
13161  N->getValueType(0) == MVT::i64))) {
13162  SDValue Load = N->getOperand(0);
13163  LoadSDNode *LD = cast<LoadSDNode>(Load);
13164  // Create the byte-swapping load.
13165  SDValue Ops[] = {
13166  LD->getChain(), // Chain
13167  LD->getBasePtr(), // Ptr
13168  DAG.getValueType(N->getValueType(0)) // VT
13169  };
13170  SDValue BSLoad =
13172  DAG.getVTList(N->getValueType(0) == MVT::i64 ?
13174  Ops, LD->getMemoryVT(), LD->getMemOperand());
13175 
13176  // If this is an i16 load, insert the truncate.
13177  SDValue ResVal = BSLoad;
13178  if (N->getValueType(0) == MVT::i16)
13179  ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
13180 
13181  // First, combine the bswap away. This makes the value produced by the
13182  // load dead.
13183  DCI.CombineTo(N, ResVal);
13184 
13185  // Next, combine the load away, we give it a bogus result value but a real
13186  // chain result. The result value is dead because the bswap is dead.
13187  DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
13188 
13189  // Return N so it doesn't get rechecked!
13190  return SDValue(N, 0);
13191  }
13192  break;
13193  case PPCISD::VCMP:
13194  // If a VCMPo node already exists with exactly the same operands as this
13195  // node, use its result instead of this node (VCMPo computes both a CR6 and
13196  // a normal output).
13197  //
13198  if (!N->getOperand(0).hasOneUse() &&
13199  !N->getOperand(1).hasOneUse() &&
13200  !N->getOperand(2).hasOneUse()) {
13201 
13202  // Scan all of the users of the LHS, looking for VCMPo's that match.
13203  SDNode *VCMPoNode = nullptr;
13204 
13205  SDNode *LHSN = N->getOperand(0).getNode();
13206  for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
13207  UI != E; ++UI)
13208  if (UI->getOpcode() == PPCISD::VCMPo &&
13209  UI->getOperand(1) == N->getOperand(1) &&
13210  UI->getOperand(2) == N->getOperand(2) &&
13211  UI->getOperand(0) == N->getOperand(0)) {
13212  VCMPoNode = *UI;
13213  break;
13214  }
13215 
13216  // If there is no VCMPo node, or if the flag value has a single use, don't
13217  // transform this.
13218  if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
13219  break;
13220 
13221  // Look at the (necessarily single) use of the flag value. If it has a
13222  // chain, this transformation is more complex. Note that multiple things
13223  // could use the value result, which we should ignore.
13224  SDNode *FlagUser = nullptr;
13225  for (SDNode::use_iterator UI = VCMPoNode->use_begin();
13226  FlagUser == nullptr; ++UI) {
13227  assert(UI != VCMPoNode->use_end() && "Didn't find user!");
13228  SDNode *User = *UI;
13229  for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
13230  if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
13231  FlagUser = User;
13232  break;
13233  }
13234  }
13235  }
13236 
13237  // If the user is a MFOCRF instruction, we know this is safe.
13238  // Otherwise we give up for right now.
13239  if (FlagUser->getOpcode() == PPCISD::MFOCRF)
13240  return SDValue(VCMPoNode, 0);
13241  }
13242  break;
13243  case ISD::BRCOND: {
13244  SDValue Cond = N->getOperand(1);
13245  SDValue Target = N->getOperand(2);
13246 
13247  if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
13248  cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
13250 
13251  // We now need to make the intrinsic dead (it cannot be instruction
13252  // selected).
13253  DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
13254  assert(Cond.getNode()->hasOneUse() &&
13255  "Counter decrement has more than one use");
13256 
13257  return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
13258  N->getOperand(0), Target);
13259  }
13260  }
13261  break;
13262  case ISD::BR_CC: {
13263  // If this is a branch on an altivec predicate comparison, lower this so
13264  // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
13265  // lowering is done pre-legalize, because the legalizer lowers the predicate
13266  // compare down to code that is difficult to reassemble.
13267  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
13268  SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
13269 
13270  // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
13271  // value. If so, pass-through the AND to get to the intrinsic.
13272  if (LHS.getOpcode() == ISD::AND &&
13274  cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
13276  isa<ConstantSDNode>(LHS.getOperand(1)) &&
13277  !isNullConstant(LHS.getOperand(1)))
13278  LHS = LHS.getOperand(0);
13279 
13280  if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
13281  cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
13283  isa<ConstantSDNode>(RHS)) {
13284  assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
13285  "Counter decrement comparison is not EQ or NE");
13286 
13287  unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
13288  bool isBDNZ = (CC == ISD::SETEQ && Val) ||
13289  (CC == ISD::SETNE && !Val);
13290 
13291  // We now need to make the intrinsic dead (it cannot be instruction
13292  // selected).
13293  DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
13294  assert(LHS.getNode()->hasOneUse() &&
13295  "Counter decrement has more than one use");
13296 
13297  return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
13298  N->getOperand(0), N->getOperand(4));
13299  }
13300 
13301  int CompareOpc;
13302  bool isDot;
13303 
13304  if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
13305  isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
13306  getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
13307  assert(isDot && "Can't compare against a vector result!");
13308 
13309  // If this is a comparison against something other than 0/1, then we know
13310  // that the condition is never/always true.
13311  unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
13312  if (Val != 0 && Val != 1) {
13313  if (CC == ISD::SETEQ) // Cond never true, remove branch.
13314  return N->getOperand(0);
13315  // Always !=, turn it into an unconditional branch.
13316  return DAG.getNode(ISD::BR, dl, MVT::Other,
13317  N->getOperand(0), N->getOperand(4));
13318  }
13319 
13320  bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
13321 
13322  // Create the PPCISD altivec 'dot' comparison node.
13323  SDValue Ops[] = {
13324  LHS.getOperand(2), // LHS of compare
13325  LHS.getOperand(3), // RHS of compare
13326  DAG.getConstant(CompareOpc, dl, MVT::i32)
13327  };
13328  EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
13329  SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
13330 
13331  // Unpack the result based on how the target uses it.
13332  PPC::Predicate CompOpc;
13333  switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
13334  default: // Can't happen, don't crash on invalid number though.
13335  case 0: // Branch on the value of the EQ bit of CR6.
13336  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
13337  break;
13338  case 1: // Branch on the inverted value of the EQ bit of CR6.
13339  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
13340  break;
13341  case 2: // Branch on the value of the LT bit of CR6.
13342  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
13343  break;
13344  case 3: // Branch on the inverted value of the LT bit of CR6.
13345  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
13346  break;
13347  }
13348 
13349  return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
13350  DAG.getConstant(CompOpc, dl, MVT::i32),
13351  DAG.getRegister(PPC::CR6, MVT::i32),
13352  N->getOperand(4), CompNode.getValue(1));
13353  }
13354  break;
13355  }
13356  case ISD::BUILD_VECTOR:
13357  return DAGCombineBuildVector(N, DCI);
13358  case ISD::ABS:
13359  return combineABS(N, DCI);
13360  case ISD::VSELECT:
13361  return combineVSelect(N, DCI);
13362  }
13363 
13364  return SDValue();
13365 }
13366 
13367 SDValue
13369  SelectionDAG &DAG,
13370  SmallVectorImpl<SDNode *> &Created) const {
13371  // fold (sdiv X, pow2)
13372  EVT VT = N->getValueType(0);
13373  if (VT == MVT::i64 && !Subtarget.isPPC64())
13374  return SDValue();
13375  if ((VT != MVT::i32 && VT != MVT::i64) ||
13376  !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
13377  return SDValue();
13378 
13379  SDLoc DL(N);
13380  SDValue N0 = N->getOperand(0);
13381 
13382  bool IsNegPow2 = (-Divisor).isPowerOf2();
13383  unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
13384  SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
13385 
13386  SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
13387  Created.push_back(Op.getNode());
13388 
13389  if (IsNegPow2) {
13390  Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
13391  Created.push_back(Op.getNode());
13392  }
13393 
13394  return Op;
13395 }
13396 
13397 //===----------------------------------------------------------------------===//
13398 // Inline Assembly Support
13399 //===----------------------------------------------------------------------===//
13400 
13402  KnownBits &Known,
13403  const APInt &DemandedElts,
13404  const SelectionDAG &DAG,
13405  unsigned Depth) const {
13406  Known.resetAll();
13407  switch (Op.getOpcode()) {
13408  default: break;
13409  case PPCISD::LBRX: {
13410  // lhbrx is known to have the top bits cleared out.
13411  if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
13412  Known.Zero = 0xFFFF0000;
13413  break;
13414  }
13415  case ISD::INTRINSIC_WO_CHAIN: {
13416  switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
13417  default: break;
13434  Known.Zero = ~1U; // All bits but the low one are known to be zero.
13435  break;
13436  }
13437  }
13438  }
13439 }
13440 
13442  switch (Subtarget.getDarwinDirective()) {
13443  default: break;
13444  case PPC::DIR_970:
13445  case PPC::DIR_PWR4:
13446  case PPC::DIR_PWR5:
13447  case PPC::DIR_PWR5X:
13448  case PPC::DIR_PWR6:
13449  case PPC::DIR_PWR6X:
13450  case PPC::DIR_PWR7:
13451  case PPC::DIR_PWR8:
13452  case PPC::DIR_PWR9: {
13453  if (!ML)
13454  break;
13455 
13456  const PPCInstrInfo *TII = Subtarget.getInstrInfo();
13457 
13458  // For small loops (between 5 and 8 instructions), align to a 32-byte
13459  // boundary so that the entire loop fits in one instruction-cache line.
13460  uint64_t LoopSize = 0;
13461  for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
13462  for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
13463  LoopSize += TII->getInstSizeInBytes(*J);
13464  if (LoopSize > 32)
13465  break;
13466  }
13467 
13468  if (LoopSize > 16 && LoopSize <= 32)
13469  return 5;
13470 
13471  break;
13472  }
13473  }
13474 
13476 }
13477 
13478 /// getConstraintType - Given a constraint, return the type of
13479 /// constraint it is for this target.
13482  if (Constraint.size() == 1) {
13483  switch (Constraint[0]) {
13484  default: break;
13485  case 'b':
13486  case 'r':
13487  case 'f':
13488  case 'd':
13489  case 'v':
13490  case 'y':
13491  return C_RegisterClass;
13492  case 'Z':
13493  // FIXME: While Z does indicate a memory constraint, it specifically
13494  // indicates an r+r address (used in conjunction with the 'y' modifier
13495  // in the replacement string). Currently, we're forcing the base
13496  // register to be r0 in the asm printer (which is interpreted as zero)
13497  // and forming the complete address in the second register. This is
13498  // suboptimal.
13499  return C_Memory;
13500  }
13501  } else if (Constraint == "wc") { // individual CR bits.
13502  return C_RegisterClass;
13503  } else if (Constraint == "wa" || Constraint == "wd" ||
13504  Constraint == "wf" || Constraint == "ws" ||
13505  Constraint == "wi") {
13506  return C_RegisterClass; // VSX registers.
13507  }
13508  return TargetLowering::getConstraintType(Constraint);
13509 }
13510 
13511 /// Examine constraint type and operand type and determine a weight value.
13512 /// This object must already have been set up with the operand type
13513 /// and the current alternative constraint selected.
13516  AsmOperandInfo &info, const char *constraint) const {
13517  ConstraintWeight weight = CW_Invalid;
13518  Value *CallOperandVal = info.CallOperandVal;
13519  // If we don't have a value, we can't do a match,
13520  // but allow it at the lowest weight.
13521  if (!CallOperandVal)
13522  return CW_Default;
13523  Type *type = CallOperandVal->getType();
13524 
13525  // Look at the constraint type.
13526  if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
13527  return CW_Register; // an individual CR bit.
13528  else if ((StringRef(constraint) == "wa" ||
13529  StringRef(constraint) == "wd" ||
13530  StringRef(constraint) == "wf") &&
13531  type->isVectorTy())
13532  return CW_Register;
13533  else if (StringRef(constraint) == "ws" && type->isDoubleTy())
13534  return CW_Register;
13535  else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
13536  return CW_Register; // just hold 64-bit integers data.
13537 
13538  switch (*constraint) {
13539  default:
13540  weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
13541  break;
13542  case 'b':
13543  if (type->isIntegerTy())
13544  weight = CW_Register;
13545  break;
13546  case 'f':
13547  if (type->isFloatTy())
13548  weight = CW_Register;
13549  break;
13550  case 'd':
13551  if (type->isDoubleTy())
13552  weight = CW_Register;
13553  break;
13554  case 'v':
13555  if (type->isVectorTy())
13556  weight = CW_Register;
13557  break;
13558  case 'y':
13559  weight = CW_Register;
13560  break;
13561  case 'Z':
13562  weight = CW_Memory;
13563  break;
13564  }
13565  return weight;
13566 }
13567 
13568 std::pair<unsigned, const TargetRegisterClass *>
13570  StringRef Constraint,
13571  MVT VT) const {
13572  if (Constraint.size() == 1) {
13573  // GCC RS6000 Constraint Letters
13574  switch (Constraint[0]) {
13575  case 'b': // R1-R31
13576  if (VT == MVT::i64 && Subtarget.isPPC64())
13577  return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
13578  return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
13579  case 'r': // R0-R31
13580  if (VT == MVT::i64 && Subtarget.isPPC64())
13581  return std::make_pair(0U, &PPC::G8RCRegClass);
13582  return std::make_pair(0U, &PPC::GPRCRegClass);
13583  // 'd' and 'f' constraints are both defined to be "the floating point
13584  // registers", where one is for 32-bit and the other for 64-bit. We don't
13585  // really care overly much here so just give them all the same reg classes.
13586  case 'd':
13587  case 'f':
13588  if (Subtarget.hasSPE()) {
13589  if (VT == MVT::f32 || VT == MVT::i32)
13590  return std::make_pair(0U, &PPC::SPE4RCRegClass);
13591  if (VT == MVT::f64 || VT == MVT::i64)
13592  return std::make_pair(0U, &PPC::SPERCRegClass);
13593  } else {
13594  if (VT == MVT::f32 || VT == MVT::i32)
13595  return std::make_pair(0U, &PPC::F4RCRegClass);
13596  if (VT == MVT::f64 || VT == MVT::i64)
13597  return std::make_pair(0U, &PPC::F8RCRegClass);
13598  if (VT == MVT::v4f64 && Subtarget.hasQPX())
13599  return std::make_pair(0U, &PPC::QFRCRegClass);
13600  if (VT == MVT::v4f32 && Subtarget.hasQPX())
13601  return std::make_pair(0U, &PPC::QSRCRegClass);
13602  }
13603  break;
13604  case 'v':
13605  if (VT == MVT::v4f64 && Subtarget.hasQPX())
13606  return std::make_pair(0U, &PPC::QFRCRegClass);
13607  if (VT == MVT::v4f32 && Subtarget.hasQPX())
13608  return std::make_pair(0U, &PPC::QSRCRegClass);
13609  if (Subtarget.hasAltivec())
13610  return std::make_pair(0U, &PPC::VRRCRegClass);
13611  break;
13612  case 'y': // crrc
13613  return std::make_pair(0U, &PPC::CRRCRegClass);
13614  }
13615  } else if (Constraint == "wc" && Subtarget.useCRBits()) {
13616  // An individual CR bit.
13617  return std::make_pair(0U, &PPC::CRBITRCRegClass);
13618  } else if ((Constraint == "wa" || Constraint == "wd" ||
13619  Constraint == "wf" || Constraint == "wi") &&
13620  Subtarget.hasVSX()) {
13621  return std::make_pair(0U, &PPC::VSRCRegClass);
13622  } else if (Constraint == "ws" && Subtarget.hasVSX()) {
13623  if (VT == MVT::f32 && Subtarget.hasP8Vector())
13624  return std::make_pair(0U, &PPC::VSSRCRegClass);
13625  else
13626  return std::make_pair(0U, &PPC::VSFRCRegClass);
13627  }
13628 
13629  std::pair<unsigned, const TargetRegisterClass *> R =
13630  TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
13631 
13632  // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
13633  // (which we call X[0-9]+). If a 64-bit value has been requested, and a
13634  // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
13635  // register.
13636  // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
13637  // the AsmName field from *RegisterInfo.td, then this would not be necessary.
13638  if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
13639  PPC::GPRCRegClass.contains(R.first))
13640  return std::make_pair(TRI->getMatchingSuperReg(R.first,
13641  PPC::sub_32, &PPC::G8RCRegClass),
13642  &PPC::G8RCRegClass);
13643 
13644  // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
13645  if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
13646  R.first = PPC::CR0;
13647  R.second = &PPC::CRRCRegClass;
13648  }
13649 
13650  return R;
13651 }
13652 
13653 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
13654 /// vector. If it is invalid, don't add anything to Ops.
13656  std::string &Constraint,
13657  std::vector<SDValue>&Ops,
13658  SelectionDAG &DAG) const {
13659  SDValue Result;
13660 
13661  // Only support length 1 constraints.
13662  if (Constraint.length() > 1) return;
13663 
13664  char Letter = Constraint[0];
13665  switch (Letter) {
13666  default: break;
13667  case 'I':
13668  case 'J':
13669  case 'K':
13670  case 'L':
13671  case 'M':
13672  case 'N':
13673  case 'O':
13674  case 'P': {
13676  if (!CST) return; // Must be an immediate to match.
13677  SDLoc dl(Op);
13678  int64_t Value = CST->getSExtValue();
13679  EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
13680  // numbers are printed as such.
13681  switch (Letter) {
13682  default: llvm_unreachable("Unknown constraint letter!");
13683  case 'I': // "I" is a signed 16-bit constant.
13684  if (isInt<16>(Value))
13685  Result = DAG.getTargetConstant(Value, dl, TCVT);
13686  break;
13687  case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
13688  if (isShiftedUInt<16, 16>(Value))
13689  Result = DAG.getTargetConstant(Value, dl, TCVT);
13690  break;
13691  case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
13692  if (isShiftedInt<16, 16>(Value))
13693  Result = DAG.getTargetConstant(Value, dl, TCVT);
13694  break;
13695  case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
13696  if (isUInt<16>(Value))
13697  Result = DAG.getTargetConstant(Value, dl, TCVT);
13698  break;
13699  case 'M': // "M" is a constant that is greater than 31.
13700  if (Value > 31)
13701  Result = DAG.getTargetConstant(Value, dl, TCVT);
13702  break;
13703  case 'N': // "N" is a positive constant that is an exact power of two.
13704  if (Value > 0 && isPowerOf2_64(Value))
13705  Result = DAG.getTargetConstant(Value, dl, TCVT);
13706  break;
13707  case 'O': // "O" is the constant zero.
13708  if (Value == 0)
13709  Result = DAG.getTargetConstant(Value, dl, TCVT);
13710  break;
13711  case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
13712  if (isInt<16>(-Value))
13713  Result = DAG.getTargetConstant(Value, dl, TCVT);
13714  break;
13715  }
13716  break;
13717  }
13718  }
13719 
13720  if (Result.getNode()) {
13721  Ops.push_back(Result);
13722  return;
13723  }
13724 
13725  // Handle standard constraint letters.
13726  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
13727 }
13728 
13729 // isLegalAddressingMode - Return true if the addressing mode represented
13730 // by AM is legal for this target, for a load/store of the specified type.
13732  const AddrMode &AM, Type *Ty,
13733  unsigned AS, Instruction *I) const {
13734  // PPC does not allow r+i addressing modes for vectors!
13735  if (Ty->isVectorTy() && AM.BaseOffs != 0)
13736  return false;
13737 
13738  // PPC allows a sign-extended 16-bit immediate field.
13739  if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
13740  return false;
13741 
13742  // No global is ever allowed as a base.
13743  if (AM.BaseGV)
13744  return false;
13745 
13746  // PPC only support r+r,
13747  switch (AM.Scale) {
13748  case 0: // "r+i" or just "i", depending on HasBaseReg.
13749  break;
13750  case 1:
13751  if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
13752  return false;
13753  // Otherwise we have r+r or r+i.
13754  break;
13755  case 2:
13756  if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
13757  return false;
13758  // Allow 2*r as r+r.
13759  break;
13760  default:
13761  // No other scales are supported.
13762  return false;
13763  }
13764 
13765  return true;
13766 }
13767 
13768 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
13769  SelectionDAG &DAG) const {
13770  MachineFunction &MF = DAG.getMachineFunction();
13771  MachineFrameInfo &MFI = MF.getFrameInfo();
13772  MFI.setReturnAddressIsTaken(true);
13773 
13775  return SDValue();
13776 
13777  SDLoc dl(Op);
13778  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
13779 
13780  // Make sure the function does not optimize away the store of the RA to
13781  // the stack.
13782  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
13783  FuncInfo->setLRStoreRequired();
13784  bool isPPC64 = Subtarget.isPPC64();
13785  auto PtrVT = getPointerTy(MF.getDataLayout());
13786 
13787  if (Depth > 0) {
13788  SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
13789  SDValue Offset =
13790  DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
13791  isPPC64 ? MVT::i64 : MVT::i32);
13792  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
13793  DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
13794  MachinePointerInfo());
13795  }
13796 
13797  // Just load the return address off the stack.
13798  SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
13799  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
13800  MachinePointerInfo());
13801 }
13802 
13803 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
13804  SelectionDAG &DAG) const {
13805  SDLoc dl(Op);
13806  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
13807 
13808  MachineFunction &MF = DAG.getMachineFunction();
13809  MachineFrameInfo &MFI = MF.getFrameInfo();
13810  MFI.setFrameAddressIsTaken(true);
13811 
13812  EVT PtrVT = getPointerTy(MF.getDataLayout());
13813  bool isPPC64 = PtrVT == MVT::i64;
13814 
13815  // Naked functions never have a frame pointer, and so we use r1. For all
13816  // other functions, this decision must be delayed until during PEI.
13817  unsigned FrameReg;
13819  FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
13820  else
13821  FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
13822 
13823  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
13824  PtrVT);
13825  while (Depth--)
13826  FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
13827  FrameAddr, MachinePointerInfo());
13828  return FrameAddr;
13829 }
13830 
13831 // FIXME? Maybe this could be a TableGen attribute on some registers and
13832 // this table could be generated automatically from RegInfo.
13833 unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
13834  SelectionDAG &DAG) const {
13835  bool isPPC64 = Subtarget.isPPC64();
13836  bool isDarwinABI = Subtarget.isDarwinABI();
13837 
13838  if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
13839  (!isPPC64 && VT != MVT::i32))
13840  report_fatal_error("Invalid register global variable type");
13841 
13842  bool is64Bit = isPPC64 && VT == MVT::i64;
13843  unsigned Reg = StringSwitch<unsigned>(RegName)
13844  .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
13845  .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2)
13846  .Case("r13", (!isPPC64 && isDarwinABI) ? 0 :
13847  (is64Bit ? PPC::X13 : PPC::R13))
13848  .Default(0);
13849 
13850  if (Reg)
13851  return Reg;
13852  report_fatal_error("Invalid register name global variable");
13853 }
13854 
13856  // 32-bit SVR4 ABI access everything as got-indirect.
13857  if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
13858  return true;
13859 
13861  // If it is small or large code model, module locals are accessed
13862  // indirectly by loading their address from .toc/.got. The difference
13863  // is that for large code model we have ADDISTocHa + LDtocL and for
13864  // small code model we simply have LDtoc.
13865  if (CModel == CodeModel::Small || CModel == CodeModel::Large)
13866  return true;
13867 
13868  // JumpTable and BlockAddress are accessed as got-indirect.
13869  if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
13870  return true;
13871 
13872  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
13873  const GlobalValue *GV = G->getGlobal();
13874  unsigned char GVFlags = Subtarget.classifyGlobalReference(GV);
13875  // The NLP flag indicates that a global access has to use an
13876  // extra indirection.
13877  if (GVFlags & PPCII::MO_NLP_FLAG)
13878  return true;
13879  }
13880 
13881  return false;
13882 }
13883 
13884 bool
13886  // The PowerPC target isn't yet aware of offsets.
13887  return false;
13888 }
13889 
13891  const CallInst &I,
13892  MachineFunction &MF,
13893  unsigned Intrinsic) const {
13894  switch (Intrinsic) {
13908  EVT VT;
13909  switch (Intrinsic) {
13911  VT = MVT::i8;
13912  break;
13914  VT = MVT::i16;
13915  break;
13917  VT = MVT::i32;
13918  break;
13920  VT = MVT::v2f64;
13921  break;
13923  VT = MVT::v4f64;
13924  break;
13926  VT = MVT::v4f32;
13927  break;
13929  VT = MVT::v2f64;
13930  break;
13932  VT = MVT::v2f32;
13933  break;
13934  default:
13935  VT = MVT::v4i32;
13936  break;
13937  }
13938 
13939  Info.opc = ISD::INTRINSIC_W_CHAIN;
13940  Info.memVT = VT;
13941  Info.ptrVal = I.getArgOperand(0);
13942  Info.offset = -VT.getStoreSize()+1;
13943  Info.size = 2*VT.getStoreSize()-1;
13944  Info.align = 1;
13946  return true;
13947  }
13954  EVT VT;
13955  switch (Intrinsic) {
13957  VT = MVT::v4f64;
13958  break;
13960  VT = MVT::v4f32;
13961  break;
13963  VT = MVT::v2f64;
13964  break;
13966  VT = MVT::v2f32;
13967  break;
13968  default:
13969  VT = MVT::v4i32;
13970  break;
13971  }
13972 
13973  Info.opc = ISD::INTRINSIC_W_CHAIN;
13974  Info.memVT = VT;
13975  Info.ptrVal = I.getArgOperand(0);
13976  Info.offset = 0;
13977  Info.size = VT.getStoreSize();
13978  Info.align = 1;
13980  return true;
13981  }
13994  EVT VT;
13995  switch (Intrinsic) {
13997  VT = MVT::i8;
13998  break;
14000  VT = MVT::i16;
14001  break;
14003  VT = MVT::i32;
14004  break;
14006  VT = MVT::v2f64;
14007  break;
14009  VT = MVT::v4f64;
14010  break;
14012  VT = MVT::v4f32;
14013  break;
14015  VT = MVT::v2f64;
14016  break;
14018  VT = MVT::v2f32;
14019  break;
14020  default:
14021  VT = MVT::v4i32;
14022  break;
14023  }
14024 
14025  Info.opc = ISD::INTRINSIC_VOID;
14026  Info.memVT = VT;
14027  Info.ptrVal = I.getArgOperand(1);
14028  Info.offset = -VT.getStoreSize()+1;
14029  Info.size = 2*VT.getStoreSize()-1;
14030  Info.align = 1;
14032  return true;
14033  }
14039  EVT VT;
14040  switch (Intrinsic) {
14042  VT = MVT::v4f64;
14043  break;
14045  VT = MVT::v4f32;
14046  break;
14048  VT = MVT::v2f64;
14049  break;
14051  VT = MVT::v2f32;
14052  break;
14053  default:
14054  VT = MVT::v4i32;
14055  break;
14056  }
14057 
14058  Info.opc = ISD::INTRINSIC_VOID;
14059  Info.memVT = VT;
14060  Info.ptrVal = I.getArgOperand(1);
14061  Info.offset = 0;
14062  Info.size = VT.getStoreSize();
14063  Info.align = 1;
14065  return true;
14066  }
14067  default:
14068  break;
14069  }
14070 
14071  return false;
14072 }
14073 
14074 /// getOptimalMemOpType - Returns the target specific optimal type for load
14075 /// and store operations as a result of memset, memcpy, and memmove
14076 /// lowering. If DstAlign is zero that means it's safe to destination
14077 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
14078 /// means there isn't a need to check it against alignment requirement,
14079 /// probably because the source does not need to be loaded. If 'IsMemset' is
14080 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
14081 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
14082 /// source is constant so it does not need to be loaded.
14083 /// It returns EVT::Other if the type should be determined using generic
14084 /// target-independent logic.
14086  unsigned DstAlign, unsigned SrcAlign,
14087  bool IsMemset, bool ZeroMemset,
14088  bool MemcpyStrSrc,
14089  MachineFunction &MF) const {
14090  if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
14091  const Function &F = MF.getFunction();
14092  // When expanding a memset, require at least two QPX instructions to cover
14093  // the cost of loading the value to be stored from the constant pool.
14094  if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
14095  (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
14097  return MVT::v4f64;
14098  }
14099 
14100  // We should use Altivec/VSX loads and stores when available. For unaligned
14101  // addresses, unaligned VSX loads are only fast starting with the P8.
14102  if (Subtarget.hasAltivec() && Size >= 16 &&
14103  (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
14104  ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
14105  return MVT::v4i32;
14106  }
14107 
14108  if (Subtarget.isPPC64()) {
14109  return MVT::i64;
14110  }
14111 
14112  return MVT::i32;
14113 }
14114 
14115 /// Returns true if it is beneficial to convert a load of a constant
14116 /// to just the constant itself.
14118  Type *Ty) const {
14119  assert(Ty->isIntegerTy());
14120 
14121  unsigned BitSize = Ty->getPrimitiveSizeInBits();
14122  return !(BitSize == 0 || BitSize > 64);
14123 }
14124 
14126  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
14127  return false;
14128  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
14129  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
14130  return NumBits1 == 64 && NumBits2 == 32;
14131 }
14132 
14134  if (!VT1.isInteger() || !VT2.isInteger())
14135  return false;
14136  unsigned NumBits1 = VT1.getSizeInBits();
14137  unsigned NumBits2 = VT2.getSizeInBits();
14138  return NumBits1 == 64 && NumBits2 == 32;
14139 }
14140 
14142  // Generally speaking, zexts are not free, but they are free when they can be
14143  // folded with other operations.
14144  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
14145  EVT MemVT = LD->getMemoryVT();
14146  if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
14147  (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
14148  (LD->getExtensionType() == ISD::NON_EXTLOAD ||
14149  LD->getExtensionType() == ISD::ZEXTLOAD))
14150  return true;
14151  }
14152 
14153  // FIXME: Add other cases...
14154  // - 32-bit shifts with a zext to i64
14155  // - zext after ctlz, bswap, etc.
14156  // - zext after and by a constant mask
14157 
14158  return TargetLowering::isZExtFree(Val, VT2);
14159 }
14160 
14161 bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
14162  assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
14163  "invalid fpext types");
14164  // Extending to float128 is not free.
14165  if (DestVT == MVT::f128)
14166  return false;
14167  return true;
14168 }
14169 
14171  return isInt<16>(Imm) || isUInt<16>(Imm);
14172 }
14173 
14175  return isInt<16>(Imm) || isUInt<16>(Imm);
14176 }
14177 
14179  unsigned,
14180  unsigned,
14181  bool *Fast) const {
14182  if (DisablePPCUnaligned)
14183  return false;
14184 
14185  // PowerPC supports unaligned memory access for simple non-vector types.
14186  // Although accessing unaligned addresses is not as efficient as accessing
14187  // aligned addresses, it is generally more efficient than manual expansion,
14188  // and generally only traps for software emulation when crossing page
14189  // boundaries.
14190 
14191  if (!VT.isSimple())
14192  return false;
14193 
14194  if (VT.getSimpleVT().isVector()) {
14195  if (Subtarget.hasVSX()) {
14196  if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
14197  VT != MVT::v4f32 && VT != MVT::v4i32)
14198  return false;
14199  } else {
14200  return false;
14201  }
14202  }
14203 
14204  if (VT == MVT::ppcf128)
14205  return false;
14206 
14207  if (Fast)
14208  *Fast = true;
14209 
14210  return true;
14211 }
14212 
14214  VT = VT.getScalarType();
14215 
14216  if (!VT.isSimple())
14217  return false;
14218 
14219  switch (VT.getSimpleVT().SimpleTy) {
14220  case MVT::f32:
14221  case MVT::f64:
14222  return true;
14223  case MVT::f128:
14224  return (EnableQuadPrecision && Subtarget.hasP9Vector());
14225  default:
14226  break;
14227  }
14228 
14229  return false;
14230 }
14231 
14232 const MCPhysReg *
14234  // LR is a callee-save register, but we must treat it as clobbered by any call
14235  // site. Hence we include LR in the scratch registers, which are in turn added
14236  // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
14237  // to CTR, which is used by any indirect call.
14238  static const MCPhysReg ScratchRegs[] = {
14239  PPC::X12, PPC::LR8, PPC::CTR8, 0
14240  };
14241 
14242  return ScratchRegs;
14243 }
14244 
14246  const Constant *PersonalityFn) const {
14247  return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
14248 }
14249 
14251  const Constant *PersonalityFn) const {
14252  return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
14253 }
14254 
14255 bool
14257  EVT VT , unsigned DefinedValues) const {
14258  if (VT == MVT::v2i64)
14259  return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
14260 
14261  if (Subtarget.hasVSX() || Subtarget.hasQPX())
14262  return true;
14263 
14264  return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
14265 }
14266 
14268  if (DisableILPPref || Subtarget.enableMachineScheduler())
14270 
14271  return Sched::ILP;
14272 }
14273 
14274 // Create a fast isel object.
14275 FastISel *
14277  const TargetLibraryInfo *LibInfo) const {
14278  return PPC::createFastISel(FuncInfo, LibInfo);
14279 }
14280 
14282  if (Subtarget.isDarwinABI()) return;
14283  if (!Subtarget.isPPC64()) return;
14284 
14285  // Update IsSplitCSR in PPCFunctionInfo
14286  PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>();
14287  PFI->setIsSplitCSR(true);
14288 }
14289 
14291  MachineBasicBlock *Entry,
14292  const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
14293  const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
14294  const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
14295  if (!IStart)
14296  return;
14297 
14298  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
14299  MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
14300  MachineBasicBlock::iterator MBBI = Entry->begin();
14301  for (const MCPhysReg *I = IStart; *I; ++I) {
14302  const TargetRegisterClass *RC = nullptr;
14303  if (PPC::G8RCRegClass.contains(*I))
14304  RC = &PPC::G8RCRegClass;
14305  else if (PPC::F8RCRegClass.contains(*I))
14306  RC = &PPC::F8RCRegClass;
14307  else if (PPC::CRRCRegClass.contains(*I))
14308  RC = &PPC::CRRCRegClass;
14309  else if (PPC::VRRCRegClass.contains(*I))
14310  RC = &PPC::VRRCRegClass;
14311  else
14312  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
14313 
14314  unsigned NewVR = MRI->createVirtualRegister(RC);
14315  // Create copy from CSR to a virtual register.
14316  // FIXME: this currently does not emit CFI pseudo-instructions, it works
14317  // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
14318  // nounwind. If we want to generalize this later, we may need to emit
14319  // CFI pseudo-instructions.
14322  "Function should be nounwind in insertCopiesSplitCSR!");
14323  Entry->addLiveIn(*I);
14324  BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
14325  .addReg(*I);
14326 
14327  // Insert the copy-back instructions right before the terminator
14328  for (auto *Exit : Exits)
14329  BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
14330  TII->get(TargetOpcode::COPY), *I)
14331  .addReg(NewVR);
14332  }
14333 }
14334 
14335 // Override to enable LOAD_STACK_GUARD lowering on Linux.
14337  if (!Subtarget.isTargetLinux())
14339  return true;
14340 }
14341 
14342 // Override to disable global variable loading on Linux.
14344  if (!Subtarget.isTargetLinux())
14346 }
14347 
14348 bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
14349  if (!VT.isSimple() || !Subtarget.hasVSX())
14350  return false;
14351 
14352  switch(VT.getSimpleVT().SimpleTy) {
14353  default:
14354  // For FP types that are currently not supported by PPC backend, return
14355  // false. Examples: f16, f80.
14356  return false;
14357  case MVT::f32:
14358  case MVT::f64:
14359  case MVT::ppcf128:
14360  return Imm.isPosZero();
14361  }
14362 }
14363 
14364 // For vector shift operation op, fold
14365 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
14367  SelectionDAG &DAG) {
14368  SDValue N0 = N->getOperand(0);
14369  SDValue N1 = N->getOperand(1);
14370  EVT VT = N0.getValueType();
14371  unsigned OpSizeInBits = VT.getScalarSizeInBits();
14372  unsigned Opcode = N->getOpcode();
14373  unsigned TargetOpcode;
14374 
14375  switch (Opcode) {
14376  default:
14377  llvm_unreachable("Unexpected shift operation");
14378  case ISD::SHL:
14379  TargetOpcode = PPCISD::SHL;
14380  break;
14381  case ISD::SRL:
14382  TargetOpcode = PPCISD::SRL;
14383  break;
14384  case ISD::SRA:
14385  TargetOpcode = PPCISD::SRA;
14386  break;
14387  }
14388 
14389  if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
14390  N1->getOpcode() == ISD::AND)
14392  if (Mask->getZExtValue() == OpSizeInBits - 1)
14393  return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
14394 
14395  return SDValue();
14396 }
14397 
14398 SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
14399  if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
14400  return Value;
14401 
14402  SDValue N0 = N->getOperand(0);
14404  if (!Subtarget.isISA3_0() ||
14405  N0.getOpcode() != ISD::SIGN_EXTEND ||
14406  N0.getOperand(0).getValueType() != MVT::i32 ||
14407  CN1 == nullptr || N->getValueType(0) != MVT::i64)
14408  return SDValue();
14409 
14410  // We can't save an operation here if the value is already extended, and
14411  // the existing shift is easier to combine.
14412  SDValue ExtsSrc = N0.getOperand(0);
14413  if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
14414  ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
14415  return SDValue();
14416 
14417  SDLoc DL(N0);
14418  SDValue ShiftBy = SDValue(CN1, 0);
14419  // We want the shift amount to be i32 on the extswli, but the shift could
14420  // have an i64.
14421  if (ShiftBy.getValueType() == MVT::i64)
14422  ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
14423 
14424  return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
14425  ShiftBy);
14426 }
14427 
14428 SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
14429  if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
14430  return Value;
14431 
14432  return SDValue();
14433 }
14434 
14435 SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
14436  if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
14437  return Value;
14438 
14439  return SDValue();
14440 }
14441 
14442 // Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
14443 // Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
14444 // When C is zero, the equation (addi Z, -C) can be simplified to Z
14445 // Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
14447  const PPCSubtarget &Subtarget) {
14448  if (!Subtarget.isPPC64())
14449  return SDValue();
14450 
14451  SDValue LHS = N->getOperand(0);
14452  SDValue RHS = N->getOperand(1);
14453 
14454  auto isZextOfCompareWithConstant = [](SDValue Op) {
14455  if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
14456  Op.getValueType() != MVT::i64)
14457  return false;
14458 
14459  SDValue Cmp = Op.getOperand(0);
14460  if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
14461  Cmp.getOperand(0).getValueType() != MVT::i64)
14462  return false;
14463 
14464  if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
14465  int64_t NegConstant = 0 - Constant->getSExtValue();
14466  // Due to the limitations of the addi instruction,
14467  // -C is required to be [-32768, 32767].
14468  return isInt<16>(NegConstant);
14469  }
14470 
14471  return false;
14472  };
14473 
14474  bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
14475  bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
14476 
14477  // If there is a pattern, canonicalize a zext operand to the RHS.
14478  if (LHSHasPattern && !RHSHasPattern)
14479  std::swap(LHS, RHS);
14480  else if (!LHSHasPattern && !RHSHasPattern)
14481  return SDValue();
14482 
14483  SDLoc DL(N);
14484  SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
14485  SDValue Cmp = RHS.getOperand(0);
14486  SDValue Z = Cmp.getOperand(0);
14487  auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));
14488 
14489  assert(Constant && "Constant Should not be a null pointer.");
14490  int64_t NegConstant = 0 - Constant->getSExtValue();
14491 
14492  switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
14493  default: break;
14494  case ISD::SETNE: {
14495  // when C == 0
14496  // --> addze X, (addic Z, -1).carry
14497  // /
14498  // add X, (zext(setne Z, C))--
14499  // \ when -32768 <= -C <= 32767 && C != 0
14500  // --> addze X, (addic (addi Z, -C), -1).carry
14501  SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
14502  DAG.getConstant(NegConstant, DL, MVT::i64));
14503  SDValue AddOrZ = NegConstant != 0 ? Add : Z;
14504  SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
14505  AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
14506  return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
14507  SDValue(Addc.getNode(), 1));
14508  }
14509  case ISD::SETEQ: {
14510  // when C == 0
14511  // --> addze X, (subfic Z, 0).carry
14512  // /
14513  // add X, (zext(sete Z, C))--
14514  // \ when -32768 <= -C <= 32767 && C != 0
14515  // --> addze X, (subfic (addi Z, -C), 0).carry
14516  SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
14517  DAG.getConstant(NegConstant, DL, MVT::i64));
14518  SDValue AddOrZ = NegConstant != 0 ? Add : Z;
14519  SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
14520  DAG.getConstant(0, DL, MVT::i64), AddOrZ);
14521  return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
14522  SDValue(Subc.getNode(), 1));
14523  }
14524  }
14525 
14526  return SDValue();
14527 }
14528 
14529 SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
14530  if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
14531  return Value;
14532 
14533  return SDValue();
14534 }
14535 
14536 // Detect TRUNCATE operations on bitcasts of float128 values.
14537 // What we are looking for here is the situtation where we extract a subset
14538 // of bits from a 128 bit float.
14539 // This can be of two forms:
14540 // 1) BITCAST of f128 feeding TRUNCATE
14541 // 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
14542 // The reason this is required is because we do not have a legal i128 type
14543 // and so we want to prevent having to store the f128 and then reload part
14544 // of it.
14545 SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
14546  DAGCombinerInfo &DCI) const {
14547  // If we are using CRBits then try that first.
14548  if (Subtarget.useCRBits()) {
14549  // Check if CRBits did anything and return that if it did.
14550  if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
14551  return CRTruncValue;
14552  }
14553 
14554  SDLoc dl(N);
14555  SDValue Op0 = N->getOperand(0);
14556 
14557  // Looking for a truncate of i128 to i64.
14558  if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
14559  return SDValue();
14560 
14561  int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
14562 
14563  // SRL feeding TRUNCATE.
14564  if (Op0.getOpcode() == ISD::SRL) {
14565  ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
14566  // The right shift has to be by 64 bits.
14567  if (!ConstNode || ConstNode->getZExtValue() != 64)
14568  return SDValue();
14569 
14570  // Switch the element number to extract.
14571  EltToExtract = EltToExtract ? 0 : 1;
14572  // Update Op0 past the SRL.
14573  Op0 = Op0.getOperand(0);
14574  }
14575 
14576  // BITCAST feeding a TRUNCATE possibly via SRL.
14577  if (Op0.getOpcode() == ISD::BITCAST &&
14578  Op0.getValueType() == MVT::i128 &&
14579  Op0.getOperand(0).getValueType() == MVT::f128) {
14580  SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
14581  return DCI.DAG.getNode(
14582  ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
14583  DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
14584  }
14585  return SDValue();
14586 }
14587 
14588 bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
14589  // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
14590  if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64())
14591  return false;
14592 
14593  // If not a tail call then no need to proceed.
14594  if (!CI->isTailCall())
14595  return false;
14596 
14597  // If tail calls are disabled for the caller then we are done.
14598  const Function *Caller = CI->getParent()->getParent();
14599  auto Attr = Caller->getFnAttribute("disable-tail-calls");
14600  if (Attr.getValueAsString() == "true")
14601  return false;
14602 
14603  // If sibling calls have been disabled and tail-calls aren't guaranteed
14604  // there is no reason to duplicate.
14605  auto &TM = getTargetMachine();
14606  if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
14607  return false;
14608 
14609  // Can't tail call a function called indirectly, or if it has variadic args.
14610  const Function *Callee = CI->getCalledFunction();
14611  if (!Callee || Callee->isVarArg())
14612  return false;
14613 
14614  // Make sure the callee and caller calling conventions are eligible for tco.
14616  CI->getCallingConv()))
14617  return false;
14618 
14619  // If the function is local then we have a good chance at tail-calling it
14620  return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
14621 }
14622 
14623 bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
14624  if (!Subtarget.hasVSX())
14625  return false;
14626  if (Subtarget.hasP9Vector() && VT == MVT::f128)
14627  return true;
14628  return VT == MVT::f32 || VT == MVT::f64 ||
14629  VT == MVT::v4f32 || VT == MVT::v2f64;
14630 }
14631 
14632 bool PPCTargetLowering::
14633 isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
14634  const Value *Mask = AndI.getOperand(1);
14635  // If the mask is suitable for andi. or andis. we should sink the and.
14636  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
14637  // Can't handle constants wider than 64-bits.
14638  if (CI->getBitWidth() > 64)
14639  return false;
14640  int64_t ConstVal = CI->getZExtValue();
14641  return isUInt<16>(ConstVal) ||
14642  (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
14643  }
14644 
14645  // For non-constant masks, we can always use the record-form and.
14646  return true;
14647 }
14648 
14649 // Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
14650 // Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
14651 // Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
14652 // Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
14653 // Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
14654 SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
14655  assert((N->getOpcode() == ISD::ABS) && "Need ABS node here");
14656  assert(Subtarget.hasP9Altivec() &&
14657  "Only combine this when P9 altivec supported!");
14658  EVT VT = N->getValueType(0);
14659  if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
14660  return SDValue();
14661 
14662  SelectionDAG &DAG = DCI.DAG;
14663  SDLoc dl(N);
14664  if (N->getOperand(0).getOpcode() == ISD::SUB) {
14665  // Even for signed integers, if it's known to be positive (as signed
14666  // integer) due to zero-extended inputs.
14667  unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
14668  unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
14669  if ((SubOpcd0 == ISD::ZERO_EXTEND ||
14670  SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
14671  (SubOpcd1 == ISD::ZERO_EXTEND ||
14672  SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
14673  return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
14674  N->getOperand(0)->getOperand(0),
14675  N->getOperand(0)->getOperand(1),
14676  DAG.getTargetConstant(0, dl, MVT::i32));
14677  }
14678 
14679  // For type v4i32, it can be optimized with xvnegsp + vabsduw
14680  if (N->getOperand(0).getValueType() == MVT::v4i32 &&
14681  N->getOperand(0).hasOneUse()) {
14682  return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
14683  N->getOperand(0)->getOperand(0),
14684  N->getOperand(0)->getOperand(1),
14685  DAG.getTargetConstant(1, dl, MVT::i32));
14686  }
14687  }
14688 
14689  return SDValue();
14690 }
14691 
14692 // For type v4i32/v8ii16/v16i8, transform
14693 // from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
14694 // from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
14695 // from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
14696 // from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
14697 SDValue PPCTargetLowering::combineVSelect(SDNode *N,
14698  DAGCombinerInfo &DCI) const {
14699  assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
14700  assert(Subtarget.hasP9Altivec() &&
14701  "Only combine this when P9 altivec supported!");
14702 
14703  SelectionDAG &DAG = DCI.DAG;
14704  SDLoc dl(N);
14705  SDValue Cond = N->getOperand(0);
14706  SDValue TrueOpnd = N->getOperand(1);
14707  SDValue FalseOpnd = N->getOperand(2);
14708  EVT VT = N->getOperand(1).getValueType();
14709 
14710  if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
14711  FalseOpnd.getOpcode() != ISD::SUB)
14712  return SDValue();
14713 
14714  // ABSD only available for type v4i32/v8i16/v16i8
14715  if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
14716  return SDValue();
14717 
14718  // At least to save one more dependent computation
14719  if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
14720  return SDValue();
14721 
14722  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
14723 
14724  // Can only handle unsigned comparison here
14725  switch (CC) {
14726  default:
14727  return SDValue();
14728  case ISD::SETUGT:
14729  case ISD::SETUGE:
14730  break;
14731  case ISD::SETULT:
14732  case ISD::SETULE:
14733  std::swap(TrueOpnd, FalseOpnd);
14734  break;
14735  }
14736 
14737  SDValue CmpOpnd1 = Cond.getOperand(0);
14738  SDValue CmpOpnd2 = Cond.getOperand(1);
14739 
14740  // SETCC CmpOpnd1 CmpOpnd2 cond
14741  // TrueOpnd = CmpOpnd1 - CmpOpnd2
14742  // FalseOpnd = CmpOpnd2 - CmpOpnd1
14743  if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
14744  TrueOpnd.getOperand(1) == CmpOpnd2 &&
14745  FalseOpnd.getOperand(0) == CmpOpnd2 &&
14746  FalseOpnd.getOperand(1) == CmpOpnd1) {
14747  return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
14748  CmpOpnd1, CmpOpnd2,
14749  DAG.getTargetConstant(0, dl, MVT::i32));
14750  }
14751 
14752  return SDValue();
14753 }
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:177
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:764
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
cl::opt< bool > ANDIGlueBug
x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
const NoneType None
Definition: None.h:24
void setFrameAddressIsTaken(bool T)
virtual bool isJumpTableRelative() const
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:571
bool isInvariant() const
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:538
User::const_op_iterator arg_iterator
The type of iterator to use when looping over actual arguments at this call site. ...
Definition: CallSite.h:213
StringRef getSection() const
Get the custom section of this global if it has one.
Definition: GlobalObject.h:90
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set, or Regs.size() if they are all allocated.
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode *> &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:259
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:594
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
EVT getValueType() const
Return the ValueType of the referenced return value.
static unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall, bool isPatchPoint, bool hasNest, SmallVectorImpl< std::pair< unsigned, SDValue >> &RegsToPass, SmallVectorImpl< SDValue > &Ops, std::vector< EVT > &NodeTys, ImmutableCallSite CS, const PPCSubtarget &Subtarget)
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
FormattedString left_justify(StringRef Str, unsigned Width)
left_justify - append spaces after string so total output is Width characters.
Definition: Format.h:145
Return with a flag operand, matched by &#39;blr&#39;.
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
static Instruction * callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
const SDValue & getOffset() const
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
void setVarArgsNumGPR(unsigned Num)
bool isUndef() const
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:35
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand...
const GlobalValue * getGlobal() const
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant, which is required to be operand #1) half of the integer or float value specified as operand #0.
Definition: ISDOpcodes.h:184
unsigned getRegisterByName(const char *RegName, EVT VT, SelectionDAG &DAG) const override
Return the register ID of the name passed in.
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1563
bool isAccessedAsGotIndirect(SDValue N) const
bool hasFPCVT() const
Definition: PPCSubtarget.h:241
#define R4(n)
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:562
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
LLVMContext & Context
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
QVFPERM = This corresponds to the QPX qvfperm instruction.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond)
Helper function to make it easier to build SetCC&#39;s if you just have an ISD::CondCode instead of an SD...
Definition: SelectionDAG.h:937
unsigned arg_size() const
Definition: CallSite.h:219
virtual void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned DstReg, ArrayRef< MachineOperand > Cond, unsigned TrueReg, unsigned FalseReg) const
Insert a select instruction into MBB before I that will copy TrueReg to DstReg when Cond is true...
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it&#39;s not CSE&#39;d)...
Definition: SelectionDAG.h:836
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
Atomic ordering constants.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:323
SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector...
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:358
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:650
This class represents lattice values for constants.
Definition: AllocatorList.h:24
unsigned getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not...
GPRC = address of GLOBAL_OFFSET_TABLE.
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:367
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:260
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
LLVM_NODISCARD bool equals_lower(StringRef RHS) const
equals_lower - Check for string equality, ignoring case.
Definition: StringRef.h:176
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State)
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
Shadow Stack GC Lowering
bool CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State)
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:519
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
bool isVector() const
Return true if this is a vector value type.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
class llvm::RegisterBankInfo GPR
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
const SDValue & getBasePtr() const
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:223
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:383
unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
unsigned getReg() const
getReg - Returns the register number.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
Definition: StringRef.h:138
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:858
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition: PPC.h:98
const SDValue & getValue() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain...
Definition: ISDOpcodes.h:699
This class represents a function call, abstracting a target machine&#39;s calling convention.
unsigned Reg
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:648
bool hasVSX() const
Definition: PPCSubtarget.h:246
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
QBRC, CHAIN = QVLFSb CHAIN, Ptr The 4xf32 load used for v4i1 constants.
bool hasFSQRT() const
Definition: PPCSubtarget.h:232
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the &#39;usesCustomInserter&#39; fla...
static MVT getFloatingPointVT(unsigned BitWidth)
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction...
const SDValue & getChain() const
Function Alias Analysis Results
bool hasQPX() const
Definition: PPCSubtarget.h:245
uint64_t getSize() const
Return the size in bytes of the memory reference.
static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit, SDValue GA)
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:131
unsigned getValNo() const
bool hasAtomicLoad() const
Return true if this atomic instruction loads from memory.
CHAIN = RFEBB CHAIN, State - Return from event-based branch.
unsigned getAlignment() const
unsigned getValueSizeInBits(unsigned ResNo) const
Returns MVT::getSizeInBits(getValueType(ResNo)).
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:321
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
unsigned second
static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
virtual const TargetRegisterClass * getRegClassFor(MVT VT) const
Return the register class that should be used for the specified value type.
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:306
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:141
F(f)
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CALL - A direct function call.
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
#define R2(n)
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned char TargetFlags=0)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
static BranchProbability getOne()
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
This defines the Use class.
Value * CallOperandVal
If this is the result output operand or a clobber, this is null, otherwise it is the incoming operand...
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
SDValue getConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offs=0, bool isT=false, unsigned char TargetFlags=0)
void setVarArgsNumFPR(unsigned Num)
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool hasSPE() const
Definition: PPCSubtarget.h:243
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
Floating-point-to-interger conversion instructions.
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
unsigned getVarArgsNumGPR() const
int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it...
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:435
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:39
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructions which can compare a register against the immediate without having to materialize the immediate into a register.
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:998
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition: ISDOpcodes.h:316
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
return AArch64::GPR64RegClass contains(Reg)
SDValue getExternalSymbol(const char *Sym, EVT VT)
Instruction * emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
bool isMemLoc() const
GlobalBaseReg - On Darwin, this node represents the result of the mflr at function entry...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, unsigned Align=1, bool *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation...
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic...
Definition: ISDOpcodes.h:114
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:210
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1135
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:136
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations...
Definition: ISDOpcodes.h:456
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
The address of a basic block.
Definition: Constants.h:840
void setVarArgsStackOffset(int Offset)
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset...
const_iterator begin() const
Definition: SmallSet.h:224
bool isISA3_0() const
Definition: PPCSubtarget.h:290
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
const HexagonInstrInfo * TII
static Type * getFloatTy(LLVMContext &C)
Definition: Type.cpp:164
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
Shift and rotation operations.
Definition: ISDOpcodes.h:410
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
Class to represent struct types.
Definition: DerivedTypes.h:201
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
Base class for LoadSDNode and StoreSDNode.
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef...
bool hasDirectMove() const
Definition: PPCSubtarget.h:271
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth...
Definition: ISDOpcodes.h:393
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
IterTy arg_end() const
Definition: CallSite.h:575
static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, unsigned MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static cl::opt< bool > EnableQuadPrecision("enable-ppc-quad-precision", cl::desc("enable quad precision float support on ppc"), cl::Hidden)
CallLoweringInfo & setChain(SDValue InChain)
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:191
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
static bool isLoad(int Opcode)
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:36
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
op_iterator op_end() const
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:743
unsigned getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
FLT_ROUNDS_ - Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest 2 Round to ...
Definition: ISDOpcodes.h:546
virtual bool useLoadStackGuardNode() const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn&#39;t supported on the target and indicate what to d...
SimpleValueType SimpleTy
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:304
bool isInConsecutiveRegs() const
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA)...
Definition: ISDOpcodes.h:96
The memory access is dereferenceable (i.e., doesn&#39;t trap).
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned Intr
Direct move from a GPR to a VSX register (algebraic)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:460
x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:401
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
ATOMIC_CMP_SWAP - the exact same as the target-independent nodes except they ensure that the compare ...
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG...
Definition: ISDOpcodes.h:73
const BlockAddress * getBlockAddress() const
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE R Default(T Value)
Definition: StringSwitch.h:203
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
LocInfo getLocInfo() const
QVALIGNI = This corresponds to the QPX qvaligni instruction.
bool isTargetELF() const
Definition: PPCSubtarget.h:305
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
zlib-gnu style compression
This file implements a class to represent arbitrary precision integral constant values and operations...
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
bool useSoftFloat() const
Definition: PPCSubtarget.h:211
SmallVector< ISD::InputArg, 32 > Ins
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:695
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
unsigned getSizeInBits() const
unsigned getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
Context object for machine code objects.
Definition: MCContext.h:63
static const unsigned PerfectShuffleTable[6561+1]
ValTy * getCalledValue() const
Return the pointer to function that is being called.
Definition: CallSite.h:100
int64_t getSExtValue() const
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:298
This is a fast-path instruction selection class that generates poor code and doesn&#39;t support illegal ...
Definition: FastISel.h:67
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
An SDNode for Power9 vector absolute value difference.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:478
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:398
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:478
SDValue getRegisterMask(const uint32_t *RegMask)
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table...
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:628
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
bool isELFv2ABI() const
bool has64BitSupport() const
has64BitSupport - Return true if the selected CPU supports 64-bit instructions, regardless of whether...
Definition: PPCSubtarget.h:209
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:402
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:121
bool hasP9Vector() const
Definition: PPCSubtarget.h:250
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be represented as an inde...
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:429
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1462
G8RC = ADDIS_TLSGD_HA x2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
Class to represent array types.
Definition: DerivedTypes.h:369
This contains information for each constraint that we are lowering.
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
MO_NLP_HIDDEN_FLAG - If this bit is set, the symbol reference is to a symbol with hidden visibility...
Definition: PPC.h:92
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself...
SmallVector< ISD::OutputArg, 32 > Outs
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:221
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:852
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments, on Darwin.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:959
EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const override
getOptimalMemOpType - Returns the target specific optimal type for load and store operations as a res...
op_iterator op_begin() const
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:576
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:747
ArrayRef< SDUse > ops() const
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is 0.0 or -0.0.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base, int64_t &Offset, SelectionDAG &DAG)
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:992
bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State)
const_iterator end() const
Definition: SmallSet.h:230
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool hasInvariantFunctionDescriptors() const
Definition: PPCSubtarget.h:267
amdgpu Simplify well known AMD library false Value * Callee
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1020
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
Value * getOperand(unsigned i) const
Definition: User.h:170
Analysis containing CSE Info
Definition: CSEInfo.cpp:21
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
unsigned getByValSize() const
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1252
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
This class is used to represent ISD::STORE nodes.
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:524
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
uint32_t FloatToBits(float Float)
This function takes a float and returns the bit equivalent 32-bit integer.
Definition: MathExtras.h:601
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:118
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:327
TargetInstrInfo - Interface to description of machine instruction set.
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:636
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:229
static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget)
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits...
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool getFunctionSections() const
Return true if functions should be emitted into their own section, corresponding to -ffunction-sectio...
bool isFloatTy() const
Return true if this is &#39;float&#39;, a 32-bit IEEE fp type.
Definition: Type.h:147
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:610
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:750
static bool usePartialVectorLoads(SDNode *N)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd)...
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
const SDValue & getBasePtr() const
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:43
virtual const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const
Return a mask of call-preserved registers for the given calling convention on the current function...
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:166
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool hasFRES() const
Definition: PPCSubtarget.h:234
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:629
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
unsigned const MachineRegisterInfo * MRI
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:120
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
FSEL - Traditional three-operand fsel node.
bool shouldAssumeDSOLocal(const Module &M, const GlobalValue *GV) const
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
Machine Value Type.
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
Simple binary floating point operators.
Definition: ISDOpcodes.h:283
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
bool hasFRSQRTES() const
Definition: PPCSubtarget.h:236
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273
unsigned getScalarSizeInBits() const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:66
MO_NLP_FLAG - If this bit is set, the symbol reference is actually to the non_lazy_ptr for the global...
Definition: PPC.h:87
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE...
Definition: ISDOpcodes.h:728
G8RC = ADDIS_DTPREL_HA x3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
SExtVElems, takes an input vector of a smaller type and sign extends to an output vector of a larger ...
VECINSERT - The PPC vector insert instruction.
Direct move from a VSX register to a GPR.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
static bool is64Bit(const char *name)
const SDValue & getOperand(unsigned Num) const
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:934
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:332
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:232
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static ManagedStatic< OptionRegistry > OR
Definition: Options.cpp:31
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the specified isSplatShuffleMask...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
virtual unsigned getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
Definition: SelectionDAG.h:824
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
STFIWX - The STFIWX instruction.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target...
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
bool isAcquireOrStronger(AtomicOrdering ao)
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
bool useLoadStackGuardNode() const override
Override to support customized stack guard loading.
const SDValue & getOffset() const
static mvt_range fp_valuetypes()
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:179
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:434
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:161
Store scalar integers from VSR.
This class provides iterator support for SDUse operands that use a specific SDNode.
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, unsigned Alignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:758
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1193
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side...
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
void insertCopiesSplitCSR(MachineBasicBlock *Entry, const SmallVectorImpl< MachineBasicBlock *> &Exits) const override
Insert explicit copies in entry and exit blocks.
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
void setNoInfs(bool b)
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setVarArgsFrameIndex(int Index)
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:767
bool hasFPRND() const
Definition: PPCSubtarget.h:240
std::string getEVTString() const
This function returns value type as a string, e.g. "i32".
Definition: ValueTypes.cpp:115
static const MCPhysReg QFPR[]
QFPR - The set of QPX registers that should be allocated for arguments.
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline...
size_t arg_size() const
Definition: Function.h:698
CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:57
void setPrefFunctionAlignment(unsigned Align)
Set the target&#39;s preferred function alignment.
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
static mvt_range vector_valuetypes()
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
arg_iterator arg_begin()
Definition: Function.h:671
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
G8RC = ADDIS_TLSLD_HA x2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
Optional< StringRef > getSectionPrefix() const
Get the section prefix for this function.
Definition: Function.cpp:1420
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
self_iterator getIterator()
Definition: ilist_node.h:82
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, unsigned Size=0)
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:181
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
QVESPLATI = This corresponds to the QPX qvesplati instruction.
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:719
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
lazy value info
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
Common code between 32-bit and 64-bit PowerPC targets.
unsigned MaxStoresPerMemmove
Specify maximum bytes of store instructions per memmove call.
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo...
Definition: ISDOpcodes.h:796
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:556
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1...
std::vector< ArgListEntry > ArgListTy
Extended Value Type.
Definition: ValueTypes.h:34
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1415
void initializeSplitCSR(MachineBasicBlock *Entry) const override
Perform necessary initialization to handle a subset of CSRs explicitly via copies.
void setMinReservedArea(unsigned size)
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool isLittleEndian() const
Definition: PPCSubtarget.h:228
This structure contains all information that is necessary for lowering calls.
bool isPositionIndependent() const
size_t size() const
Definition: SmallVector.h:53
bool isVolatile() const
unsigned getDarwinDirective() const
getDarwinDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:171
const TargetMachine & getTargetMachine() const
StringRef getSection() const
Definition: Globals.cpp:161
This class contains a discriminated union of information about pointers in memory operands...
unsigned getNumOperands() const
Return the number of values used by this operation.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
void setUseUnderscoreLongJmp(bool Val)
Indicate whether this target prefers to use _longjmp to implement llvm.longjmp or the version without...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
uint64_t getAlignment() const
Return the minimum known alignment in bytes of the actual memory reference.
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
const uint32_t * getNoPreservedMask() const override
SDValue CreateStackTemporary(EVT VT, unsigned minAlign=1)
Create a stack temporary, suitable for holding the specified value type.
bool hasP8Vector() const
Definition: PPCSubtarget.h:247
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
unsigned first
unsigned getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
const MCPhysReg * getCalleeSavedRegsViaCopy(const MachineFunction *MF) const
The memory access writes data.
bool isReleaseOrStronger(AtomicOrdering ao)
Extract a subvector from unsigned integer vector and convert to FP.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type...
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
SDValue getTargetConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:639
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:633
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
QBFLT = Access the underlying QPX floating-point boolean representation.
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef...
EXTSWSLI = The PPC extswsli instruction, which does an extend-sign word and shift left immediate...
const PPCRegisterInfo * getRegisterInfo() const override
Definition: PPCSubtarget.h:189
void addLiveInAttr(unsigned VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:404
Iterator for intrusive lists based on ilist_node.
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs, bool HasQPX)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
CCState - This class holds information needed while lowering arguments and return values...
x3 = GET_TLSLD_ADDR x3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override
Returns true if the target can instruction select the specified FP immediate natively.
#define R6(n)
void setNode(SDNode *N)
set the SDNode
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:339
bool isJumpTableRelative() const override
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:265
XXSPLT - The PPC VSX splat instructions.
bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State)
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
VECSHL - The PPC vector shift left instruction.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:222
IterTy arg_begin() const
Definition: CallSite.h:571
X = FP_ROUND_INREG(Y, VT) - This operator takes an FP register, and rounds it to a floating point val...
Definition: ISDOpcodes.h:553
bool hasComdat() const
Definition: GlobalValue.h:226
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS model, produces an ADD instruction that ...
Module.h This file contains the declarations for the Module class.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:734
Provides information about what library functions are available for the current target.
bool isVector(MCInstrInfo const &MCII, MCInst const &MCI)
CCValAssign - Represent assignment of one arg/retval to a location.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo)
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:1044
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:27
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:96
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:730
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:644
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
const DataFlowGraph & G
Definition: RDFGraph.cpp:211
bool isDarwinABI() const
Definition: PPCSubtarget.h:309
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:413
static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildSplatI - Build a canonical splati of Val with an element size of SplatSize.
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID...
CHAIN = SC CHAIN, Imm128 - System call.
This is an abstract virtual class for memory operations.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node. ...
const Constant * getConstVal() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always beneficiates from combining into FMA for a given value type...
void setHasMultipleConditionRegisters(bool hasManyRegs=true)
Tells the code generator that the target has multiple (allocatable) condition registers that can be u...
x3 = GET_TLS_ADDR x3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
void setNoNaNs(bool b)
Represents one node in the SelectionDAG.
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:685
VPERM - The PPC VPERM Instruction.
int64_t getImm() const
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction...
bool hasP8Altivec() const
Definition: PPCSubtarget.h:248
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:679
const Function & getFunction() const
Return the LLVM function that this machine code represents.
STXSIX - The STXSI[bh]X instruction.
unsigned getMinReservedArea() const
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
static mvt_range integer_valuetypes()
The access may modify the value stored in memory.
i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after execu...
MachinePointerInfo getWithOffset(int64_t O) const
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
bool isDereferenceable() const
G8RC = ADDIS_GOT_TPREL_HA x2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
Class to represent vector types.
Definition: DerivedTypes.h:393
bool hasP9Altivec() const
Definition: PPCSubtarget.h:251
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT...
Definition: ValueTypes.h:73
void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
EVT getMemoryVT() const
Return the type of the in-memory value.
Target - Wrapper for Target specific information.
Class for arbitrary precision integers.
Definition: APInt.h:70
unsigned getByValAlign() const
CodeModel::Model getCodeModel() const
Returns the code model.
bool hasRecipPrec() const
Definition: PPCSubtarget.h:237
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
QVGPCI = This corresponds to the QPX qvgpci instruction.
iterator_range< use_iterator > uses()
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate, that is the target has add instructions which can add a register and the immediate without having to materialize the immediate into a register.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:420
void setMinFunctionAlignment(unsigned Align)
Set the target&#39;s minimum function alignment (in log2(bytes))
bool isPowerOf2() const
Check if this APInt&#39;s value is a power of two greater than zero.
Definition: APInt.h:464
static use_iterator use_end()
void setPrefLoopAlignment(unsigned Align)
Set the target&#39;s preferred loop alignment.
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:468
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:471
bool isTailCall() const
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2...
LLVM_ATTRIBUTE_ALWAYS_INLINE StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:70
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
bool use64BitRegs() const
use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit registers in 32-bit mode when...
Definition: PPCSubtarget.h:216
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:478
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:312
int getMaskElt(unsigned Idx) const
bool enableMachineScheduler() const override
bool hasComdat() const
Definition: GlobalObject.h:100
amdgpu Simplify well known AMD library false Value Value * Arg
GPRC = address of GLOBAL_OFFSET_TABLE.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase, but as an MCExpr.
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca...
Definition: ISDOpcodes.h:859
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static const fltSemantics & PPCDoubleDouble() LLVM_READNONE
Definition: APFloat.cpp:135
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:638
Representation of each machine instruction.
Definition: MachineInstr.h:64
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer, a SRCVALUE for the destination, and a SRCVALUE for the source.
Definition: ISDOpcodes.h:724
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack...
These are IR-level optimization flags that may be propagated to SDNodes.
Represents a use of a SDNode.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:673
SmallVector< SDValue, 32 > OutVals
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch history rolling buffer entry...
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:387
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:705
bool isStrongDefinitionForLinker() const
Returns true if this global&#39;s definition will be the one chosen by the linker.
Definition: GlobalValue.h:537
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool isPosZero() const
Definition: APFloat.h:1158
unsigned getLocMemOffset() const
Reciprocal estimate instructions (unary FP ops).
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:206
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
static bool isFPExtLoad(SDValue Op)
unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg...
bool hasFRSQRTE() const
Definition: PPCSubtarget.h:235
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:486
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
bool isInConsecutiveRegsLast() const
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1225
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:614
bool hasLazyResolverStub(const GlobalValue *GV) const
hasLazyResolverStub - Return true if accesses to the specified global have to go through a dyld lazy ...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
PointerUnion< const Value *, const PseudoSourceValue * > ptrVal
TargetOptions Options
Definition: TargetMachine.h:97
Establish a view to a call site for examination.
Definition: CallSite.h:711
bool hasLFIWAX() const
Definition: PPCSubtarget.h:239
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1181
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
static MachineOperand CreateImm(int64_t Val)
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
Direct move from a GPR to a VSX register (zero)
static bool callsShareTOCBase(const Function *Caller, SDValue Callee, const TargetMachine &TM)
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:403
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Flags getFlags() const
Return the raw flags of the source value,.
bool hasFCPSGN() const
Definition: PPCSubtarget.h:231
The CMPB instruction (takes two operands of i32 or i64).
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
The memory access always returns the same value (or traps).
unsigned MaxStoresPerMemmoveOptSize
Maximum number of store instructions that may be substituted for a call to memmove, used for functions with OptSize attribute.
bool hasAltivec() const
Definition: PPCSubtarget.h:242
unsigned MaxStoresPerMemcpyOptSize
Maximum number of store operations that may be substituted for a call to memcpy, used for functions w...
void setStackPointerRegisterToSaveRestore(unsigned R)
If set to a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save and restore.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a byte-swapping store instruction. ...
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
block_iterator block_end() const
Definition: LoopInfo.h:155
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:733
uint32_t Size
Definition: Profile.cpp:47
static void setUsesTOCBasePtr(MachineFunction &MF)
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1974
TC_RETURN - A tail call return.
constexpr char IsConst[]
Key for Kernel::Arg::Metadata::mIsConst.
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool useCRBits() const
useCRBits - Return true if we should store and manipulate i1 values in the individual condition regis...
Definition: PPCSubtarget.h:220
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
unsigned getOpcode() const
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:608
SDValue getValue(unsigned R) const
unsigned MaxStoresPerMemcpy
Specify maximum bytes of store instructions per memcpy call.
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:346
bool isDarwin() const
isDarwin - True if this is any darwin platform.
Definition: PPCSubtarget.h:301
unsigned getNumRegisters(LLVMContext &Context, EVT VT) const
Return the number of registers that this ValueType will eventually require.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.setjmp intrinsic.
Definition: ISDOpcodes.h:108
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:749
static bool isConstantOrUndef(int Op, int Val)
isConstantOrUndef - Op is either an undef node or a ConstantSDNode.
static bool isFunctionGlobalAddress(SDValue Callee)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
bool isRegLoc() const
const MachinePointerInfo & getPointerInfo() const
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:345
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if &#39;Op & Mask&#39; is known to be zero.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
XXREVERSE - The PPC VSX reverse instruction.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void insert(iterator MBBI, MachineBasicBlock *MBB)
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
void setReturnAddressIsTaken(bool s)
bool hasFRE() const
Definition: PPCSubtarget.h:233
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
void setMinStackArgumentAlignment(unsigned Align)
Set the minimum stack alignment of an argument (in log2(bytes)).
Direct move of 2 consective GPR to a VSX register.
POPCNTDKind hasPOPCNTD() const
Definition: PPCSubtarget.h:296
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
ArrayRef< int > getMask() const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566
LLVM Value Representation.
Definition: Value.h:73
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:302
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
SDValue getRegister(unsigned Reg, EVT VT)
void setUseUnderscoreSetJmp(bool Val)
Indicate whether this target prefers to use _setjmp to implement llvm.setjmp or the version without _...
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
unsigned getSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it&#39;s free to truncate a value of type Ty1 to type Ty2...
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
Instruction * emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst, AtomicOrdering Ord) const override
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC&#39;s if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:962
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
SDValue getValueType(EVT)
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:776
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
bool isNonTemporal() const
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:331
These nodes represent PPC shifts.
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
PICLevel::Level getPICLevel() const
Returns the PIC level (small or large model)
Definition: Module.cpp:490
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
IRTranslator LLVM IR MI
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:443
static bool isSplat(ArrayRef< Value *> VL)
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
unsigned MaxStoresPerMemsetOptSize
Maximum number of stores operations that may be substituted for the call to memset, used for functions with OptSize attribute.
static BranchProbability getZero()
bool isSVR4ABI() const
Definition: PPCSubtarget.h:310
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:198
unsigned getNumOperands() const
Conversion operators.
Definition: ISDOpcodes.h:465
Extract a subvector from signed integer vector and convert to FP.
const SDValue & getOperand(unsigned i) const
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:789
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
unsigned getLocReg() const
uint64_t getZExtValue() const
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:474
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:126
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
#define LLVM_DEBUG(X)
Definition: Debug.h:123
unsigned AllocateReg(unsigned Reg)
AllocateReg - Attempt to allocate one register.
unsigned getLiveInVirtReg(unsigned PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in physical ...
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Definition: CallSite.h:271
On a symbol operand "FOO", this indicates that the reference is actually to "FOO@plt".
Definition: PPC.h:79
bool isDoubleTy() const
Return true if this is &#39;double&#39;, a 64-bit IEEE fp type.
Definition: Type.h:150
unsigned getVarArgsNumFPR() const
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
block_iterator block_begin() const
Definition: LoopInfo.h:154
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:584
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain...
unsigned AllocateStack(unsigned Size, unsigned Align)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the altivec VCMP*o instructions.
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:785
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:131
const SDValue & getBasePtr() const
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase, but as an MCExpr.
LLVMContext * getContext() const
Definition: SelectionDAG.h:407
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate...
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:375
static Constant * get(ArrayRef< Constant *> V)
Definition: Constants.cpp:1079
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function&#39;s picbase...
Definition: PPC.h:83
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
static bool hasSameArgumentList(const Function *CallerFn, ImmutableCallSite CS)
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
XXPERMDI - The PPC XXPERMDI instruction.
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
Definition: Instruction.h:67
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:622
void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:914
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it&#39;s implicit...
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:380
int isQVALIGNIShuffleMask(SDNode *N)
If this is a qvaligni shuffle mask, return the shift amount, otherwise return -1. ...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
This class is used to represent ISD::LOAD nodes.
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary...
Definition: ISDOpcodes.h:623
bool useSoftFloat() const override