LLVM  8.0.1
AArch64ISelLowering.cpp
Go to the documentation of this file.
1 //===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the AArch64TargetLowering class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64ISelLowering.h"
17 #include "AArch64PerfectShuffle.h"
18 #include "AArch64RegisterInfo.h"
19 #include "AArch64Subtarget.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/ADT/APFloat.h"
23 #include "llvm/ADT/APInt.h"
24 #include "llvm/ADT/ArrayRef.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/Statistic.h"
28 #include "llvm/ADT/StringRef.h"
29 #include "llvm/ADT/StringSwitch.h"
30 #include "llvm/ADT/Triple.h"
31 #include "llvm/ADT/Twine.h"
47 #include "llvm/IR/Attributes.h"
48 #include "llvm/IR/Constants.h"
49 #include "llvm/IR/DataLayout.h"
50 #include "llvm/IR/DebugLoc.h"
51 #include "llvm/IR/DerivedTypes.h"
52 #include "llvm/IR/Function.h"
54 #include "llvm/IR/GlobalValue.h"
55 #include "llvm/IR/IRBuilder.h"
56 #include "llvm/IR/Instruction.h"
57 #include "llvm/IR/Instructions.h"
58 #include "llvm/IR/Intrinsics.h"
59 #include "llvm/IR/Module.h"
60 #include "llvm/IR/OperandTraits.h"
61 #include "llvm/IR/Type.h"
62 #include "llvm/IR/Use.h"
63 #include "llvm/IR/Value.h"
64 #include "llvm/MC/MCRegisterInfo.h"
65 #include "llvm/Support/Casting.h"
66 #include "llvm/Support/CodeGen.h"
68 #include "llvm/Support/Compiler.h"
69 #include "llvm/Support/Debug.h"
71 #include "llvm/Support/KnownBits.h"
77 #include <algorithm>
78 #include <bitset>
79 #include <cassert>
80 #include <cctype>
81 #include <cstdint>
82 #include <cstdlib>
83 #include <iterator>
84 #include <limits>
85 #include <tuple>
86 #include <utility>
87 #include <vector>
88 
89 using namespace llvm;
90 
91 #define DEBUG_TYPE "aarch64-lower"
92 
93 STATISTIC(NumTailCalls, "Number of tail calls");
94 STATISTIC(NumShiftInserts, "Number of vector shift inserts");
95 STATISTIC(NumOptimizedImms, "Number of times immediates were optimized");
96 
97 static cl::opt<bool>
98 EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
99  cl::desc("Allow AArch64 SLI/SRI formation"),
100  cl::init(false));
101 
102 // FIXME: The necessary dtprel relocations don't seem to be supported
103 // well in the GNU bfd and gold linkers at the moment. Therefore, by
104 // default, for now, fall back to GeneralDynamic code generation.
106  "aarch64-elf-ldtls-generation", cl::Hidden,
107  cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
108  cl::init(false));
109 
110 static cl::opt<bool>
111 EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
112  cl::desc("Enable AArch64 logical imm instruction "
113  "optimization"),
114  cl::init(true));
115 
116 /// Value type used for condition codes.
117 static const MVT MVT_CC = MVT::i32;
118 
120  const AArch64Subtarget &STI)
121  : TargetLowering(TM), Subtarget(&STI) {
122  // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
123  // we have to make something up. Arbitrarily, choose ZeroOrOne.
125  // When comparing vectors the result sets the different elements in the
126  // vector to all-one or all-zero.
128 
129  // Set up the register classes.
130  addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
131  addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
132 
133  if (Subtarget->hasFPARMv8()) {
134  addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
135  addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
136  addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
137  addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
138  }
139 
140  if (Subtarget->hasNEON()) {
141  addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
142  addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
143  // Someone set us up the NEON.
144  addDRTypeForNEON(MVT::v2f32);
145  addDRTypeForNEON(MVT::v8i8);
146  addDRTypeForNEON(MVT::v4i16);
147  addDRTypeForNEON(MVT::v2i32);
148  addDRTypeForNEON(MVT::v1i64);
149  addDRTypeForNEON(MVT::v1f64);
150  addDRTypeForNEON(MVT::v4f16);
151 
152  addQRTypeForNEON(MVT::v4f32);
153  addQRTypeForNEON(MVT::v2f64);
154  addQRTypeForNEON(MVT::v16i8);
155  addQRTypeForNEON(MVT::v8i16);
156  addQRTypeForNEON(MVT::v4i32);
157  addQRTypeForNEON(MVT::v2i64);
158  addQRTypeForNEON(MVT::v8f16);
159  }
160 
161  // Compute derived properties from the register classes
163 
164  // Provide all sorts of operation actions
192 
196 
200 
202 
203  // Custom lowering hooks are needed for XOR
204  // to fold it into CSINC/CSINV.
207 
208  // Virtually no operation on f128 is legal, but LLVM can't expand them when
209  // there's a valid register class, so we need custom operations in most cases.
231 
232  // Lowering for many of the conversions is actually specified by the non-f128
233  // type. The LowerXXX function will be trivial when f128 isn't involved.
248 
249  // Variable arguments.
254 
255  // Variable-sized objects.
258 
259  if (Subtarget->isTargetWindows())
261  else
263 
264  // Constant pool entries
266 
267  // BlockAddress
269 
270  // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
279 
280  // AArch64 lacks both left-rotate and popcount instructions.
283  for (MVT VT : MVT::vector_valuetypes()) {
286  }
287 
288  // AArch64 doesn't have {U|S}MUL_LOHI.
291 
294 
297  for (MVT VT : MVT::vector_valuetypes()) {
300  }
307 
308  // Custom lower Add/Sub/Mul with overflow.
321 
330  if (Subtarget->hasFullFP16())
332  else
334 
368 
369  if (!Subtarget->hasFullFP16()) {
392 
393  // promote v4f16 to v4f32 when that is known to be safe.
406 
422 
443  }
444 
445  // AArch64 has implementations of a lot of rounding-like FP operations.
446  for (MVT Ty : {MVT::f32, MVT::f64}) {
457  }
458 
459  if (Subtarget->hasFullFP16()) {
470  }
471 
473 
475 
481 
482  // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
483  // This requires the Performance Monitors extension.
484  if (Subtarget->hasPerfMon())
486 
487  if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
488  getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
489  // Issue __sincos_stret if available.
492  } else {
495  }
496 
497  // Make floating-point constants legal for the large code model, so they don't
498  // become loads from the constant pool.
499  if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
502  }
503 
504  // AArch64 does not have floating-point extending loads, i1 sign-extending
505  // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
506  for (MVT VT : MVT::fp_valuetypes()) {
511  }
512  for (MVT VT : MVT::integer_valuetypes())
514 
522 
525 
526  // Indexed loads and stores are supported.
527  for (unsigned im = (unsigned)ISD::PRE_INC;
543  }
544 
545  // Trap.
547 
548  // We combine OR nodes for bitfield operations.
550 
551  // Vector add and sub nodes may conceal a high-half opportunity.
552  // Also, try to fold ADD into CSINC/CSINV..
559 
563 
565 
572  if (Subtarget->supportsAddressTopByteIgnored())
574 
576 
579 
583 
585 
586  // In case of strict alignment, avoid an excessive number of byte wide stores.
590 
595 
597 
599 
601 
602  EnableExtLdPromotion = true;
603 
604  // Set required alignment.
606  // Set preferred alignments.
609 
610  // Only change the limit for entries in a jump table if specified by
611  // the subtarget, but not at the command line.
612  unsigned MaxJT = STI.getMaximumJumpTableSize();
613  if (MaxJT && getMaximumJumpTableSize() == 0)
615 
616  setHasExtractBitsInsn(true);
617 
619 
620  if (Subtarget->hasNEON()) {
621  // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
622  // silliness like this:
648 
654 
656 
657  // AArch64 doesn't have a direct vector ->f32 conversion instructions for
658  // elements smaller than i32, so promote the input to i32 first.
663  // i8 and i16 vector elements also need promotion to i32 for v8i8 or v8i16
664  // -> v8f16 conversions.
669  // Similarly, there is no direct i32 -> f64 vector conversion instruction.
674  // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
675  // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
678 
681 
690 
691  // AArch64 doesn't have MUL.2d:
693  // Custom handling for some quad-vector types to detect MULL.
697 
698  // Vector reductions
699  for (MVT VT : MVT::integer_valuetypes()) {
705  }
706  for (MVT VT : MVT::fp_valuetypes()) {
709  }
710 
713  // Likewise, narrowing and extending vector loads/stores aren't handled
714  // directly.
715  for (MVT VT : MVT::vector_valuetypes()) {
717 
718  if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
721  } else {
724  }
727 
729 
730  for (MVT InnerVT : MVT::vector_valuetypes()) {
731  setTruncStoreAction(VT, InnerVT, Expand);
732  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
733  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
734  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
735  }
736  }
737 
738  // AArch64 has implementations of a lot of rounding-like FP operations.
739  for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
746  }
747 
749  }
750 
752 }
753 
754 void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
755  assert(VT.isVector() && "VT should be a vector type");
756 
757  if (VT.isFloatingPoint()) {
759  setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
760  setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
761  }
762 
763  // Mark vector float intrinsics as expand.
764  if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
773 
774  // But we do support custom-lowering for FCOPYSIGN.
776  }
777 
790 
794  for (MVT InnerVT : MVT::all_valuetypes())
795  setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
796 
797  // CNT supports only B element sizes, then use UADDLP to widen.
798  if (VT != MVT::v8i8 && VT != MVT::v16i8)
800 
806 
809 
810  if (!VT.isFloatingPoint())
812 
813  // [SU][MIN|MAX] are available for all NEON types apart from i64.
814  if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
815  for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
816  setOperationAction(Opcode, VT, Legal);
817 
818  // F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
819  if (VT.isFloatingPoint() &&
820  (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
821  for (unsigned Opcode :
823  setOperationAction(Opcode, VT, Legal);
824 
825  if (Subtarget->isLittleEndian()) {
826  for (unsigned im = (unsigned)ISD::PRE_INC;
830  }
831  }
832 }
833 
834 void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
835  addRegisterClass(VT, &AArch64::FPR64RegClass);
836  addTypeForNEON(VT, MVT::v2i32);
837 }
838 
839 void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
840  addRegisterClass(VT, &AArch64::FPR128RegClass);
841  addTypeForNEON(VT, MVT::v4i32);
842 }
843 
845  EVT VT) const {
846  if (!VT.isVector())
847  return MVT::i32;
849 }
850 
851 static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
852  const APInt &Demanded,
854  unsigned NewOpc) {
855  uint64_t OldImm = Imm, NewImm, Enc;
856  uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
857 
858  // Return if the immediate is already all zeros, all ones, a bimm32 or a
859  // bimm64.
860  if (Imm == 0 || Imm == Mask ||
861  AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
862  return false;
863 
864  unsigned EltSize = Size;
865  uint64_t DemandedBits = Demanded.getZExtValue();
866 
867  // Clear bits that are not demanded.
868  Imm &= DemandedBits;
869 
870  while (true) {
871  // The goal here is to set the non-demanded bits in a way that minimizes
872  // the number of switching between 0 and 1. In order to achieve this goal,
873  // we set the non-demanded bits to the value of the preceding demanded bits.
874  // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
875  // non-demanded bit), we copy bit0 (1) to the least significant 'x',
876  // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
877  // The final result is 0b11000011.
878  uint64_t NonDemandedBits = ~DemandedBits;
879  uint64_t InvertedImm = ~Imm & DemandedBits;
880  uint64_t RotatedImm =
881  ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
882  NonDemandedBits;
883  uint64_t Sum = RotatedImm + NonDemandedBits;
884  bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
885  uint64_t Ones = (Sum + Carry) & NonDemandedBits;
886  NewImm = (Imm | Ones) & Mask;
887 
888  // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
889  // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
890  // we halve the element size and continue the search.
891  if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask)))
892  break;
893 
894  // We cannot shrink the element size any further if it is 2-bits.
895  if (EltSize == 2)
896  return false;
897 
898  EltSize /= 2;
899  Mask >>= EltSize;
900  uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
901 
902  // Return if there is mismatch in any of the demanded bits of Imm and Hi.
903  if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
904  return false;
905 
906  // Merge the upper and lower halves of Imm and DemandedBits.
907  Imm |= Hi;
908  DemandedBits |= DemandedBitsHi;
909  }
910 
911  ++NumOptimizedImms;
912 
913  // Replicate the element across the register width.
914  while (EltSize < Size) {
915  NewImm |= NewImm << EltSize;
916  EltSize *= 2;
917  }
918 
919  (void)OldImm;
920  assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
921  "demanded bits should never be altered");
922  assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm");
923 
924  // Create the new constant immediate node.
925  EVT VT = Op.getValueType();
926  SDLoc DL(Op);
927  SDValue New;
928 
929  // If the new constant immediate is all-zeros or all-ones, let the target
930  // independent DAG combine optimize this node.
931  if (NewImm == 0 || NewImm == OrigMask) {
932  New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
933  TLO.DAG.getConstant(NewImm, DL, VT));
934  // Otherwise, create a machine node so that target independent DAG combine
935  // doesn't undo this optimization.
936  } else {
937  Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
938  SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
939  New = SDValue(
940  TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
941  }
942 
943  return TLO.CombineTo(Op, New);
944 }
945 
947  SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const {
948  // Delay this optimization to as late as possible.
949  if (!TLO.LegalOps)
950  return false;
951 
953  return false;
954 
955  EVT VT = Op.getValueType();
956  if (VT.isVector())
957  return false;
958 
959  unsigned Size = VT.getSizeInBits();
960  assert((Size == 32 || Size == 64) &&
961  "i32 or i64 is expected after legalization.");
962 
963  // Exit early if we demand all bits.
964  if (Demanded.countPopulation() == Size)
965  return false;
966 
967  unsigned NewOpc;
968  switch (Op.getOpcode()) {
969  default:
970  return false;
971  case ISD::AND:
972  NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
973  break;
974  case ISD::OR:
975  NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
976  break;
977  case ISD::XOR:
978  NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
979  break;
980  }
982  if (!C)
983  return false;
984  uint64_t Imm = C->getZExtValue();
985  return optimizeLogicalImm(Op, Size, Imm, Demanded, TLO, NewOpc);
986 }
987 
988 /// computeKnownBitsForTargetNode - Determine which of the bits specified in
989 /// Mask are known to be either zero or one and return them Known.
991  const SDValue Op, KnownBits &Known,
992  const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
993  switch (Op.getOpcode()) {
994  default:
995  break;
996  case AArch64ISD::CSEL: {
997  KnownBits Known2;
998  Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
999  Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
1000  Known.Zero &= Known2.Zero;
1001  Known.One &= Known2.One;
1002  break;
1003  }
1004  case ISD::INTRINSIC_W_CHAIN: {
1005  ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
1006  Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
1007  switch (IntID) {
1008  default: return;
1010  case Intrinsic::aarch64_ldxr: {
1011  unsigned BitWidth = Known.getBitWidth();
1012  EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
1013  unsigned MemBits = VT.getScalarSizeInBits();
1014  Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
1015  return;
1016  }
1017  }
1018  break;
1019  }
1021  case ISD::INTRINSIC_VOID: {
1022  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1023  switch (IntNo) {
1024  default:
1025  break;
1028  // Figure out the datatype of the vector operand. The UMINV instruction
1029  // will zero extend the result, so we can mark as known zero all the
1030  // bits larger than the element datatype. 32-bit or larget doesn't need
1031  // this as those are legal types and will be handled by isel directly.
1032  MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
1033  unsigned BitWidth = Known.getBitWidth();
1034  if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1035  assert(BitWidth >= 8 && "Unexpected width!");
1036  APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
1037  Known.Zero |= Mask;
1038  } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1039  assert(BitWidth >= 16 && "Unexpected width!");
1040  APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
1041  Known.Zero |= Mask;
1042  }
1043  break;
1044  } break;
1045  }
1046  }
1047  }
1048 }
1049 
1051  EVT) const {
1052  return MVT::i64;
1053 }
1054 
1056  unsigned AddrSpace,
1057  unsigned Align,
1058  bool *Fast) const {
1059  if (Subtarget->requiresStrictAlign())
1060  return false;
1061 
1062  if (Fast) {
1063  // Some CPUs are fine with unaligned stores except for 128-bit ones.
1064  *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
1065  // See comments in performSTORECombine() for more details about
1066  // these conditions.
1067 
1068  // Code that uses clang vector extensions can mark that it
1069  // wants unaligned accesses to be treated as fast by
1070  // underspecifying alignment to be 1 or 2.
1071  Align <= 2 ||
1072 
1073  // Disregard v2i64. Memcpy lowering produces those and splitting
1074  // them regresses performance on micro-benchmarks and olden/bh.
1075  VT == MVT::v2i64;
1076  }
1077  return true;
1078 }
1079 
1080 FastISel *
1082  const TargetLibraryInfo *libInfo) const {
1083  return AArch64::createFastISel(funcInfo, libInfo);
1084 }
1085 
1086 const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
1087  switch ((AArch64ISD::NodeType)Opcode) {
1088  case AArch64ISD::FIRST_NUMBER: break;
1089  case AArch64ISD::CALL: return "AArch64ISD::CALL";
1090  case AArch64ISD::ADRP: return "AArch64ISD::ADRP";
1091  case AArch64ISD::ADR: return "AArch64ISD::ADR";
1092  case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow";
1093  case AArch64ISD::LOADgot: return "AArch64ISD::LOADgot";
1094  case AArch64ISD::RET_FLAG: return "AArch64ISD::RET_FLAG";
1095  case AArch64ISD::BRCOND: return "AArch64ISD::BRCOND";
1096  case AArch64ISD::CSEL: return "AArch64ISD::CSEL";
1097  case AArch64ISD::FCSEL: return "AArch64ISD::FCSEL";
1098  case AArch64ISD::CSINV: return "AArch64ISD::CSINV";
1099  case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG";
1100  case AArch64ISD::CSINC: return "AArch64ISD::CSINC";
1101  case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
1102  case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ";
1103  case AArch64ISD::ADC: return "AArch64ISD::ADC";
1104  case AArch64ISD::SBC: return "AArch64ISD::SBC";
1105  case AArch64ISD::ADDS: return "AArch64ISD::ADDS";
1106  case AArch64ISD::SUBS: return "AArch64ISD::SUBS";
1107  case AArch64ISD::ADCS: return "AArch64ISD::ADCS";
1108  case AArch64ISD::SBCS: return "AArch64ISD::SBCS";
1109  case AArch64ISD::ANDS: return "AArch64ISD::ANDS";
1110  case AArch64ISD::CCMP: return "AArch64ISD::CCMP";
1111  case AArch64ISD::CCMN: return "AArch64ISD::CCMN";
1112  case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP";
1113  case AArch64ISD::FCMP: return "AArch64ISD::FCMP";
1114  case AArch64ISD::DUP: return "AArch64ISD::DUP";
1115  case AArch64ISD::DUPLANE8: return "AArch64ISD::DUPLANE8";
1116  case AArch64ISD::DUPLANE16: return "AArch64ISD::DUPLANE16";
1117  case AArch64ISD::DUPLANE32: return "AArch64ISD::DUPLANE32";
1118  case AArch64ISD::DUPLANE64: return "AArch64ISD::DUPLANE64";
1119  case AArch64ISD::MOVI: return "AArch64ISD::MOVI";
1120  case AArch64ISD::MOVIshift: return "AArch64ISD::MOVIshift";
1121  case AArch64ISD::MOVIedit: return "AArch64ISD::MOVIedit";
1122  case AArch64ISD::MOVImsl: return "AArch64ISD::MOVImsl";
1123  case AArch64ISD::FMOV: return "AArch64ISD::FMOV";
1124  case AArch64ISD::MVNIshift: return "AArch64ISD::MVNIshift";
1125  case AArch64ISD::MVNImsl: return "AArch64ISD::MVNImsl";
1126  case AArch64ISD::BICi: return "AArch64ISD::BICi";
1127  case AArch64ISD::ORRi: return "AArch64ISD::ORRi";
1128  case AArch64ISD::BSL: return "AArch64ISD::BSL";
1129  case AArch64ISD::NEG: return "AArch64ISD::NEG";
1130  case AArch64ISD::EXTR: return "AArch64ISD::EXTR";
1131  case AArch64ISD::ZIP1: return "AArch64ISD::ZIP1";
1132  case AArch64ISD::ZIP2: return "AArch64ISD::ZIP2";
1133  case AArch64ISD::UZP1: return "AArch64ISD::UZP1";
1134  case AArch64ISD::UZP2: return "AArch64ISD::UZP2";
1135  case AArch64ISD::TRN1: return "AArch64ISD::TRN1";
1136  case AArch64ISD::TRN2: return "AArch64ISD::TRN2";
1137  case AArch64ISD::REV16: return "AArch64ISD::REV16";
1138  case AArch64ISD::REV32: return "AArch64ISD::REV32";
1139  case AArch64ISD::REV64: return "AArch64ISD::REV64";
1140  case AArch64ISD::EXT: return "AArch64ISD::EXT";
1141  case AArch64ISD::VSHL: return "AArch64ISD::VSHL";
1142  case AArch64ISD::VLSHR: return "AArch64ISD::VLSHR";
1143  case AArch64ISD::VASHR: return "AArch64ISD::VASHR";
1144  case AArch64ISD::CMEQ: return "AArch64ISD::CMEQ";
1145  case AArch64ISD::CMGE: return "AArch64ISD::CMGE";
1146  case AArch64ISD::CMGT: return "AArch64ISD::CMGT";
1147  case AArch64ISD::CMHI: return "AArch64ISD::CMHI";
1148  case AArch64ISD::CMHS: return "AArch64ISD::CMHS";
1149  case AArch64ISD::FCMEQ: return "AArch64ISD::FCMEQ";
1150  case AArch64ISD::FCMGE: return "AArch64ISD::FCMGE";
1151  case AArch64ISD::FCMGT: return "AArch64ISD::FCMGT";
1152  case AArch64ISD::CMEQz: return "AArch64ISD::CMEQz";
1153  case AArch64ISD::CMGEz: return "AArch64ISD::CMGEz";
1154  case AArch64ISD::CMGTz: return "AArch64ISD::CMGTz";
1155  case AArch64ISD::CMLEz: return "AArch64ISD::CMLEz";
1156  case AArch64ISD::CMLTz: return "AArch64ISD::CMLTz";
1157  case AArch64ISD::FCMEQz: return "AArch64ISD::FCMEQz";
1158  case AArch64ISD::FCMGEz: return "AArch64ISD::FCMGEz";
1159  case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz";
1160  case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz";
1161  case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz";
1162  case AArch64ISD::SADDV: return "AArch64ISD::SADDV";
1163  case AArch64ISD::UADDV: return "AArch64ISD::UADDV";
1164  case AArch64ISD::SMINV: return "AArch64ISD::SMINV";
1165  case AArch64ISD::UMINV: return "AArch64ISD::UMINV";
1166  case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV";
1167  case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV";
1168  case AArch64ISD::NOT: return "AArch64ISD::NOT";
1169  case AArch64ISD::BIT: return "AArch64ISD::BIT";
1170  case AArch64ISD::CBZ: return "AArch64ISD::CBZ";
1171  case AArch64ISD::CBNZ: return "AArch64ISD::CBNZ";
1172  case AArch64ISD::TBZ: return "AArch64ISD::TBZ";
1173  case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ";
1174  case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
1175  case AArch64ISD::PREFETCH: return "AArch64ISD::PREFETCH";
1176  case AArch64ISD::SITOF: return "AArch64ISD::SITOF";
1177  case AArch64ISD::UITOF: return "AArch64ISD::UITOF";
1178  case AArch64ISD::NVCAST: return "AArch64ISD::NVCAST";
1179  case AArch64ISD::SQSHL_I: return "AArch64ISD::SQSHL_I";
1180  case AArch64ISD::UQSHL_I: return "AArch64ISD::UQSHL_I";
1181  case AArch64ISD::SRSHR_I: return "AArch64ISD::SRSHR_I";
1182  case AArch64ISD::URSHR_I: return "AArch64ISD::URSHR_I";
1183  case AArch64ISD::SQSHLU_I: return "AArch64ISD::SQSHLU_I";
1184  case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
1185  case AArch64ISD::LD2post: return "AArch64ISD::LD2post";
1186  case AArch64ISD::LD3post: return "AArch64ISD::LD3post";
1187  case AArch64ISD::LD4post: return "AArch64ISD::LD4post";
1188  case AArch64ISD::ST2post: return "AArch64ISD::ST2post";
1189  case AArch64ISD::ST3post: return "AArch64ISD::ST3post";
1190  case AArch64ISD::ST4post: return "AArch64ISD::ST4post";
1191  case AArch64ISD::LD1x2post: return "AArch64ISD::LD1x2post";
1192  case AArch64ISD::LD1x3post: return "AArch64ISD::LD1x3post";
1193  case AArch64ISD::LD1x4post: return "AArch64ISD::LD1x4post";
1194  case AArch64ISD::ST1x2post: return "AArch64ISD::ST1x2post";
1195  case AArch64ISD::ST1x3post: return "AArch64ISD::ST1x3post";
1196  case AArch64ISD::ST1x4post: return "AArch64ISD::ST1x4post";
1197  case AArch64ISD::LD1DUPpost: return "AArch64ISD::LD1DUPpost";
1198  case AArch64ISD::LD2DUPpost: return "AArch64ISD::LD2DUPpost";
1199  case AArch64ISD::LD3DUPpost: return "AArch64ISD::LD3DUPpost";
1200  case AArch64ISD::LD4DUPpost: return "AArch64ISD::LD4DUPpost";
1201  case AArch64ISD::LD1LANEpost: return "AArch64ISD::LD1LANEpost";
1202  case AArch64ISD::LD2LANEpost: return "AArch64ISD::LD2LANEpost";
1203  case AArch64ISD::LD3LANEpost: return "AArch64ISD::LD3LANEpost";
1204  case AArch64ISD::LD4LANEpost: return "AArch64ISD::LD4LANEpost";
1205  case AArch64ISD::ST2LANEpost: return "AArch64ISD::ST2LANEpost";
1206  case AArch64ISD::ST3LANEpost: return "AArch64ISD::ST3LANEpost";
1207  case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost";
1208  case AArch64ISD::SMULL: return "AArch64ISD::SMULL";
1209  case AArch64ISD::UMULL: return "AArch64ISD::UMULL";
1210  case AArch64ISD::FRECPE: return "AArch64ISD::FRECPE";
1211  case AArch64ISD::FRECPS: return "AArch64ISD::FRECPS";
1212  case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE";
1213  case AArch64ISD::FRSQRTS: return "AArch64ISD::FRSQRTS";
1214  }
1215  return nullptr;
1216 }
1217 
1220  MachineBasicBlock *MBB) const {
1221  // We materialise the F128CSEL pseudo-instruction as some control flow and a
1222  // phi node:
1223 
1224  // OrigBB:
1225  // [... previous instrs leading to comparison ...]
1226  // b.ne TrueBB
1227  // b EndBB
1228  // TrueBB:
1229  // ; Fallthrough
1230  // EndBB:
1231  // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
1232 
1233  MachineFunction *MF = MBB->getParent();
1234  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1235  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
1236  DebugLoc DL = MI.getDebugLoc();
1237  MachineFunction::iterator It = ++MBB->getIterator();
1238 
1239  unsigned DestReg = MI.getOperand(0).getReg();
1240  unsigned IfTrueReg = MI.getOperand(1).getReg();
1241  unsigned IfFalseReg = MI.getOperand(2).getReg();
1242  unsigned CondCode = MI.getOperand(3).getImm();
1243  bool NZCVKilled = MI.getOperand(4).isKill();
1244 
1245  MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
1246  MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
1247  MF->insert(It, TrueBB);
1248  MF->insert(It, EndBB);
1249 
1250  // Transfer rest of current basic-block to EndBB
1251  EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
1252  MBB->end());
1253  EndBB->transferSuccessorsAndUpdatePHIs(MBB);
1254 
1255  BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
1256  BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1257  MBB->addSuccessor(TrueBB);
1258  MBB->addSuccessor(EndBB);
1259 
1260  // TrueBB falls through to the end.
1261  TrueBB->addSuccessor(EndBB);
1262 
1263  if (!NZCVKilled) {
1264  TrueBB->addLiveIn(AArch64::NZCV);
1265  EndBB->addLiveIn(AArch64::NZCV);
1266  }
1267 
1268  BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
1269  .addReg(IfTrueReg)
1270  .addMBB(TrueBB)
1271  .addReg(IfFalseReg)
1272  .addMBB(MBB);
1273 
1274  MI.eraseFromParent();
1275  return EndBB;
1276 }
1277 
1279  MachineInstr &MI, MachineBasicBlock *BB) const {
1281  BB->getParent()->getFunction().getPersonalityFn())) &&
1282  "SEH does not use catchret!");
1283  return BB;
1284 }
1285 
1287  MachineInstr &MI, MachineBasicBlock *BB) const {
1288  MI.eraseFromParent();
1289  return BB;
1290 }
1291 
1293  MachineInstr &MI, MachineBasicBlock *BB) const {
1294  switch (MI.getOpcode()) {
1295  default:
1296 #ifndef NDEBUG
1297  MI.dump();
1298 #endif
1299  llvm_unreachable("Unexpected instruction for custom inserter!");
1300 
1301  case AArch64::F128CSEL:
1302  return EmitF128CSEL(MI, BB);
1303 
1304  case TargetOpcode::STACKMAP:
1305  case TargetOpcode::PATCHPOINT:
1306  return emitPatchPoint(MI, BB);
1307 
1308  case AArch64::CATCHRET:
1309  return EmitLoweredCatchRet(MI, BB);
1310  case AArch64::CATCHPAD:
1311  return EmitLoweredCatchPad(MI, BB);
1312  }
1313 }
1314 
1315 //===----------------------------------------------------------------------===//
1316 // AArch64 Lowering private implementation.
1317 //===----------------------------------------------------------------------===//
1318 
1319 //===----------------------------------------------------------------------===//
1320 // Lowering Code
1321 //===----------------------------------------------------------------------===//
1322 
1323 /// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
1324 /// CC
1326  switch (CC) {
1327  default:
1328  llvm_unreachable("Unknown condition code!");
1329  case ISD::SETNE:
1330  return AArch64CC::NE;
1331  case ISD::SETEQ:
1332  return AArch64CC::EQ;
1333  case ISD::SETGT:
1334  return AArch64CC::GT;
1335  case ISD::SETGE:
1336  return AArch64CC::GE;
1337  case ISD::SETLT:
1338  return AArch64CC::LT;
1339  case ISD::SETLE:
1340  return AArch64CC::LE;
1341  case ISD::SETUGT:
1342  return AArch64CC::HI;
1343  case ISD::SETUGE:
1344  return AArch64CC::HS;
1345  case ISD::SETULT:
1346  return AArch64CC::LO;
1347  case ISD::SETULE:
1348  return AArch64CC::LS;
1349  }
1350 }
1351 
1352 /// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
1355  AArch64CC::CondCode &CondCode2) {
1356  CondCode2 = AArch64CC::AL;
1357  switch (CC) {
1358  default:
1359  llvm_unreachable("Unknown FP condition!");
1360  case ISD::SETEQ:
1361  case ISD::SETOEQ:
1362  CondCode = AArch64CC::EQ;
1363  break;
1364  case ISD::SETGT:
1365  case ISD::SETOGT:
1366  CondCode = AArch64CC::GT;
1367  break;
1368  case ISD::SETGE:
1369  case ISD::SETOGE:
1370  CondCode = AArch64CC::GE;
1371  break;
1372  case ISD::SETOLT:
1373  CondCode = AArch64CC::MI;
1374  break;
1375  case ISD::SETOLE:
1376  CondCode = AArch64CC::LS;
1377  break;
1378  case ISD::SETONE:
1379  CondCode = AArch64CC::MI;
1380  CondCode2 = AArch64CC::GT;
1381  break;
1382  case ISD::SETO:
1383  CondCode = AArch64CC::VC;
1384  break;
1385  case ISD::SETUO:
1386  CondCode = AArch64CC::VS;
1387  break;
1388  case ISD::SETUEQ:
1389  CondCode = AArch64CC::EQ;
1390  CondCode2 = AArch64CC::VS;
1391  break;
1392  case ISD::SETUGT:
1393  CondCode = AArch64CC::HI;
1394  break;
1395  case ISD::SETUGE:
1396  CondCode = AArch64CC::PL;
1397  break;
1398  case ISD::SETLT:
1399  case ISD::SETULT:
1400  CondCode = AArch64CC::LT;
1401  break;
1402  case ISD::SETLE:
1403  case ISD::SETULE:
1404  CondCode = AArch64CC::LE;
1405  break;
1406  case ISD::SETNE:
1407  case ISD::SETUNE:
1408  CondCode = AArch64CC::NE;
1409  break;
1410  }
1411 }
1412 
1413 /// Convert a DAG fp condition code to an AArch64 CC.
1414 /// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1415 /// should be AND'ed instead of OR'ed.
1418  AArch64CC::CondCode &CondCode2) {
1419  CondCode2 = AArch64CC::AL;
1420  switch (CC) {
1421  default:
1422  changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1423  assert(CondCode2 == AArch64CC::AL);
1424  break;
1425  case ISD::SETONE:
1426  // (a one b)
1427  // == ((a olt b) || (a ogt b))
1428  // == ((a ord b) && (a une b))
1429  CondCode = AArch64CC::VC;
1430  CondCode2 = AArch64CC::NE;
1431  break;
1432  case ISD::SETUEQ:
1433  // (a ueq b)
1434  // == ((a uno b) || (a oeq b))
1435  // == ((a ule b) && (a uge b))
1436  CondCode = AArch64CC::PL;
1437  CondCode2 = AArch64CC::LE;
1438  break;
1439  }
1440 }
1441 
1442 /// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
1443 /// CC usable with the vector instructions. Fewer operations are available
1444 /// without a real NZCV register, so we have to use less efficient combinations
1445 /// to get the same effect.
1448  AArch64CC::CondCode &CondCode2,
1449  bool &Invert) {
1450  Invert = false;
1451  switch (CC) {
1452  default:
1453  // Mostly the scalar mappings work fine.
1454  changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1455  break;
1456  case ISD::SETUO:
1457  Invert = true;
1459  case ISD::SETO:
1460  CondCode = AArch64CC::MI;
1461  CondCode2 = AArch64CC::GE;
1462  break;
1463  case ISD::SETUEQ:
1464  case ISD::SETULT:
1465  case ISD::SETULE:
1466  case ISD::SETUGT:
1467  case ISD::SETUGE:
1468  // All of the compare-mask comparisons are ordered, but we can switch
1469  // between the two by a double inversion. E.g. ULE == !OGT.
1470  Invert = true;
1471  changeFPCCToAArch64CC(getSetCCInverse(CC, false), CondCode, CondCode2);
1472  break;
1473  }
1474 }
1475 
1476 static bool isLegalArithImmed(uint64_t C) {
1477  // Matches AArch64DAGToDAGISel::SelectArithImmed().
1478  bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
1479  LLVM_DEBUG(dbgs() << "Is imm " << C
1480  << " legal: " << (IsLegal ? "yes\n" : "no\n"));
1481  return IsLegal;
1482 }
1483 
1484 // Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
1485 // the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
1486 // can be set differently by this operation. It comes down to whether
1487 // "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
1488 // everything is fine. If not then the optimization is wrong. Thus general
1489 // comparisons are only valid if op2 != 0.
1490 //
1491 // So, finally, the only LLVM-native comparisons that don't mention C and V
1492 // are SETEQ and SETNE. They're the only ones we can safely use CMN for in
1493 // the absence of information about op2.
1494 static bool isCMN(SDValue Op, ISD::CondCode CC) {
1495  return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
1496  (CC == ISD::SETEQ || CC == ISD::SETNE);
1497 }
1498 
1500  const SDLoc &dl, SelectionDAG &DAG) {
1501  EVT VT = LHS.getValueType();
1502  const bool FullFP16 =
1503  static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
1504 
1505  if (VT.isFloatingPoint()) {
1506  assert(VT != MVT::f128);
1507  if (VT == MVT::f16 && !FullFP16) {
1508  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
1509  RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
1510  VT = MVT::f32;
1511  }
1512  return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
1513  }
1514 
1515  // The CMP instruction is just an alias for SUBS, and representing it as
1516  // SUBS means that it's possible to get CSE with subtract operations.
1517  // A later phase can perform the optimization of setting the destination
1518  // register to WZR/XZR if it ends up being unused.
1519  unsigned Opcode = AArch64ISD::SUBS;
1520 
1521  if (isCMN(RHS, CC)) {
1522  // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
1523  Opcode = AArch64ISD::ADDS;
1524  RHS = RHS.getOperand(1);
1525  } else if (isCMN(LHS, CC)) {
1526  // As we are looking for EQ/NE compares, the operands can be commuted ; can
1527  // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
1528  Opcode = AArch64ISD::ADDS;
1529  LHS = LHS.getOperand(1);
1530  } else if (LHS.getOpcode() == ISD::AND && isNullConstant(RHS) &&
1531  !isUnsignedIntSetCC(CC)) {
1532  // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
1533  // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
1534  // of the signed comparisons.
1535  Opcode = AArch64ISD::ANDS;
1536  RHS = LHS.getOperand(1);
1537  LHS = LHS.getOperand(0);
1538  }
1539 
1540  return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
1541  .getValue(1);
1542 }
1543 
1544 /// \defgroup AArch64CCMP CMP;CCMP matching
1545 ///
1546 /// These functions deal with the formation of CMP;CCMP;... sequences.
1547 /// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
1548 /// a comparison. They set the NZCV flags to a predefined value if their
1549 /// predicate is false. This allows to express arbitrary conjunctions, for
1550 /// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B)))"
1551 /// expressed as:
1552 /// cmp A
1553 /// ccmp B, inv(CB), CA
1554 /// check for CB flags
1555 ///
1556 /// This naturally lets us implement chains of AND operations with SETCC
1557 /// operands. And we can even implement some other situations by transforming
1558 /// them:
1559 /// - We can implement (NEG SETCC) i.e. negating a single comparison by
1560 /// negating the flags used in a CCMP/FCCMP operations.
1561 /// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
1562 /// by negating the flags we test for afterwards. i.e.
1563 /// NEG (CMP CCMP CCCMP ...) can be implemented.
1564 /// - Note that we can only ever negate all previously processed results.
1565 /// What we can not implement by flipping the flags to test is a negation
1566 /// of two sub-trees (because the negation affects all sub-trees emitted so
1567 /// far, so the 2nd sub-tree we emit would also affect the first).
1568 /// With those tools we can implement some OR operations:
1569 /// - (OR (SETCC A) (SETCC B)) can be implemented via:
1570 /// NEG (AND (NEG (SETCC A)) (NEG (SETCC B)))
1571 /// - After transforming OR to NEG/AND combinations we may be able to use NEG
1572 /// elimination rules from earlier to implement the whole thing as a
1573 /// CCMP/FCCMP chain.
1574 ///
1575 /// As complete example:
1576 /// or (or (setCA (cmp A)) (setCB (cmp B)))
1577 /// (and (setCC (cmp C)) (setCD (cmp D)))"
1578 /// can be reassociated to:
1579 /// or (and (setCC (cmp C)) setCD (cmp D))
1580 // (or (setCA (cmp A)) (setCB (cmp B)))
1581 /// can be transformed to:
1582 /// not (and (not (and (setCC (cmp C)) (setCD (cmp D))))
1583 /// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
1584 /// which can be implemented as:
1585 /// cmp C
1586 /// ccmp D, inv(CD), CC
1587 /// ccmp A, CA, inv(CD)
1588 /// ccmp B, CB, inv(CA)
1589 /// check for CB flags
1590 ///
1591 /// A counterexample is "or (and A B) (and C D)" which translates to
1592 /// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we
1593 /// can only implement 1 of the inner (not) operations, but not both!
1594 /// @{
1595 
1596 /// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
1598  ISD::CondCode CC, SDValue CCOp,
1600  AArch64CC::CondCode OutCC,
1601  const SDLoc &DL, SelectionDAG &DAG) {
1602  unsigned Opcode = 0;
1603  const bool FullFP16 =
1604  static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
1605 
1606  if (LHS.getValueType().isFloatingPoint()) {
1607  assert(LHS.getValueType() != MVT::f128);
1608  if (LHS.getValueType() == MVT::f16 && !FullFP16) {
1609  LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
1610  RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
1611  }
1612  Opcode = AArch64ISD::FCCMP;
1613  } else if (RHS.getOpcode() == ISD::SUB) {
1614  SDValue SubOp0 = RHS.getOperand(0);
1615  if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
1616  // See emitComparison() on why we can only do this for SETEQ and SETNE.
1617  Opcode = AArch64ISD::CCMN;
1618  RHS = RHS.getOperand(1);
1619  }
1620  }
1621  if (Opcode == 0)
1622  Opcode = AArch64ISD::CCMP;
1623 
1624  SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
1626  unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
1627  SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
1628  return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
1629 }
1630 
1631 /// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be
1632 /// expressed as a conjunction. See \ref AArch64CCMP.
1633 /// \param CanNegate Set to true if we can negate the whole sub-tree just by
1634 /// changing the conditions on the SETCC tests.
1635 /// (this means we can call emitConjunctionRec() with
1636 /// Negate==true on this sub-tree)
1637 /// \param MustBeFirst Set to true if this subtree needs to be negated and we
1638 /// cannot do the negation naturally. We are required to
1639 /// emit the subtree first in this case.
1640 /// \param WillNegate Is true if are called when the result of this
1641 /// subexpression must be negated. This happens when the
1642 /// outer expression is an OR. We can use this fact to know
1643 /// that we have a double negation (or (or ...) ...) that
1644 /// can be implemented for free.
1645 static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
1646  bool &MustBeFirst, bool WillNegate,
1647  unsigned Depth = 0) {
1648  if (!Val.hasOneUse())
1649  return false;
1650  unsigned Opcode = Val->getOpcode();
1651  if (Opcode == ISD::SETCC) {
1652  if (Val->getOperand(0).getValueType() == MVT::f128)
1653  return false;
1654  CanNegate = true;
1655  MustBeFirst = false;
1656  return true;
1657  }
1658  // Protect against exponential runtime and stack overflow.
1659  if (Depth > 6)
1660  return false;
1661  if (Opcode == ISD::AND || Opcode == ISD::OR) {
1662  bool IsOR = Opcode == ISD::OR;
1663  SDValue O0 = Val->getOperand(0);
1664  SDValue O1 = Val->getOperand(1);
1665  bool CanNegateL;
1666  bool MustBeFirstL;
1667  if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
1668  return false;
1669  bool CanNegateR;
1670  bool MustBeFirstR;
1671  if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
1672  return false;
1673 
1674  if (MustBeFirstL && MustBeFirstR)
1675  return false;
1676 
1677  if (IsOR) {
1678  // For an OR expression we need to be able to naturally negate at least
1679  // one side or we cannot do the transformation at all.
1680  if (!CanNegateL && !CanNegateR)
1681  return false;
1682  // If we the result of the OR will be negated and we can naturally negate
1683  // the leafs, then this sub-tree as a whole negates naturally.
1684  CanNegate = WillNegate && CanNegateL && CanNegateR;
1685  // If we cannot naturally negate the whole sub-tree, then this must be
1686  // emitted first.
1687  MustBeFirst = !CanNegate;
1688  } else {
1689  assert(Opcode == ISD::AND && "Must be OR or AND");
1690  // We cannot naturally negate an AND operation.
1691  CanNegate = false;
1692  MustBeFirst = MustBeFirstL || MustBeFirstR;
1693  }
1694  return true;
1695  }
1696  return false;
1697 }
1698 
1699 /// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
1700 /// of CCMP/CFCMP ops. See @ref AArch64CCMP.
1701 /// Tries to transform the given i1 producing node @p Val to a series compare
1702 /// and conditional compare operations. @returns an NZCV flags producing node
1703 /// and sets @p OutCC to the flags that should be tested or returns SDValue() if
1704 /// transformation was not possible.
1705 /// \p Negate is true if we want this sub-tree being negated just by changing
1706 /// SETCC conditions.
1708  AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
1710  // We're at a tree leaf, produce a conditional comparison operation.
1711  unsigned Opcode = Val->getOpcode();
1712  if (Opcode == ISD::SETCC) {
1713  SDValue LHS = Val->getOperand(0);
1714  SDValue RHS = Val->getOperand(1);
1715  ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
1716  bool isInteger = LHS.getValueType().isInteger();
1717  if (Negate)
1718  CC = getSetCCInverse(CC, isInteger);
1719  SDLoc DL(Val);
1720  // Determine OutCC and handle FP special case.
1721  if (isInteger) {
1722  OutCC = changeIntCCToAArch64CC(CC);
1723  } else {
1725  AArch64CC::CondCode ExtraCC;
1726  changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
1727  // Some floating point conditions can't be tested with a single condition
1728  // code. Construct an additional comparison in this case.
1729  if (ExtraCC != AArch64CC::AL) {
1730  SDValue ExtraCmp;
1731  if (!CCOp.getNode())
1732  ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
1733  else
1734  ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
1735  ExtraCC, DL, DAG);
1736  CCOp = ExtraCmp;
1737  Predicate = ExtraCC;
1738  }
1739  }
1740 
1741  // Produce a normal comparison if we are first in the chain
1742  if (!CCOp)
1743  return emitComparison(LHS, RHS, CC, DL, DAG);
1744  // Otherwise produce a ccmp.
1745  return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
1746  DAG);
1747  }
1748  assert(Val->hasOneUse() && "Valid conjunction/disjunction tree");
1749 
1750  bool IsOR = Opcode == ISD::OR;
1751 
1752  SDValue LHS = Val->getOperand(0);
1753  bool CanNegateL;
1754  bool MustBeFirstL;
1755  bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
1756  assert(ValidL && "Valid conjunction/disjunction tree");
1757  (void)ValidL;
1758 
1759  SDValue RHS = Val->getOperand(1);
1760  bool CanNegateR;
1761  bool MustBeFirstR;
1762  bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
1763  assert(ValidR && "Valid conjunction/disjunction tree");
1764  (void)ValidR;
1765 
1766  // Swap sub-tree that must come first to the right side.
1767  if (MustBeFirstL) {
1768  assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
1769  std::swap(LHS, RHS);
1770  std::swap(CanNegateL, CanNegateR);
1771  std::swap(MustBeFirstL, MustBeFirstR);
1772  }
1773 
1774  bool NegateR;
1775  bool NegateAfterR;
1776  bool NegateL;
1777  bool NegateAfterAll;
1778  if (Opcode == ISD::OR) {
1779  // Swap the sub-tree that we can negate naturally to the left.
1780  if (!CanNegateL) {
1781  assert(CanNegateR && "at least one side must be negatable");
1782  assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
1783  assert(!Negate);
1784  std::swap(LHS, RHS);
1785  NegateR = false;
1786  NegateAfterR = true;
1787  } else {
1788  // Negate the left sub-tree if possible, otherwise negate the result.
1789  NegateR = CanNegateR;
1790  NegateAfterR = !CanNegateR;
1791  }
1792  NegateL = true;
1793  NegateAfterAll = !Negate;
1794  } else {
1795  assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree");
1796  assert(!Negate && "Valid conjunction/disjunction tree");
1797 
1798  NegateL = false;
1799  NegateR = false;
1800  NegateAfterR = false;
1801  NegateAfterAll = false;
1802  }
1803 
1804  // Emit sub-trees.
1805  AArch64CC::CondCode RHSCC;
1806  SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate);
1807  if (NegateAfterR)
1808  RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
1809  SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC);
1810  if (NegateAfterAll)
1811  OutCC = AArch64CC::getInvertedCondCode(OutCC);
1812  return CmpL;
1813 }
1814 
1815 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
1816 /// In some cases this is even possible with OR operations in the expression.
1817 /// See \ref AArch64CCMP.
1818 /// \see emitConjunctionRec().
1820  AArch64CC::CondCode &OutCC) {
1821  bool DummyCanNegate;
1822  bool DummyMustBeFirst;
1823  if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
1824  return SDValue();
1825 
1826  return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
1827 }
1828 
1829 /// @}
1830 
1831 /// Returns how profitable it is to fold a comparison's operand's shift and/or
1832 /// extension operations.
1834  auto isSupportedExtend = [&](SDValue V) {
1835  if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
1836  return true;
1837 
1838  if (V.getOpcode() == ISD::AND)
1839  if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
1840  uint64_t Mask = MaskCst->getZExtValue();
1841  return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
1842  }
1843 
1844  return false;
1845  };
1846 
1847  if (!Op.hasOneUse())
1848  return 0;
1849 
1850  if (isSupportedExtend(Op))
1851  return 1;
1852 
1853  unsigned Opc = Op.getOpcode();
1854  if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
1855  if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
1856  uint64_t Shift = ShiftCst->getZExtValue();
1857  if (isSupportedExtend(Op.getOperand(0)))
1858  return (Shift <= 4) ? 2 : 1;
1859  EVT VT = Op.getValueType();
1860  if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
1861  return 1;
1862  }
1863 
1864  return 0;
1865 }
1866 
1868  SDValue &AArch64cc, SelectionDAG &DAG,
1869  const SDLoc &dl) {
1870  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
1871  EVT VT = RHS.getValueType();
1872  uint64_t C = RHSC->getZExtValue();
1873  if (!isLegalArithImmed(C)) {
1874  // Constant does not fit, try adjusting it by one?
1875  switch (CC) {
1876  default:
1877  break;
1878  case ISD::SETLT:
1879  case ISD::SETGE:
1880  if ((VT == MVT::i32 && C != 0x80000000 &&
1881  isLegalArithImmed((uint32_t)(C - 1))) ||
1882  (VT == MVT::i64 && C != 0x80000000ULL &&
1883  isLegalArithImmed(C - 1ULL))) {
1884  CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
1885  C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
1886  RHS = DAG.getConstant(C, dl, VT);
1887  }
1888  break;
1889  case ISD::SETULT:
1890  case ISD::SETUGE:
1891  if ((VT == MVT::i32 && C != 0 &&
1892  isLegalArithImmed((uint32_t)(C - 1))) ||
1893  (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
1894  CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
1895  C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
1896  RHS = DAG.getConstant(C, dl, VT);
1897  }
1898  break;
1899  case ISD::SETLE:
1900  case ISD::SETGT:
1901  if ((VT == MVT::i32 && C != INT32_MAX &&
1902  isLegalArithImmed((uint32_t)(C + 1))) ||
1903  (VT == MVT::i64 && C != INT64_MAX &&
1904  isLegalArithImmed(C + 1ULL))) {
1905  CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
1906  C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
1907  RHS = DAG.getConstant(C, dl, VT);
1908  }
1909  break;
1910  case ISD::SETULE:
1911  case ISD::SETUGT:
1912  if ((VT == MVT::i32 && C != UINT32_MAX &&
1913  isLegalArithImmed((uint32_t)(C + 1))) ||
1914  (VT == MVT::i64 && C != UINT64_MAX &&
1915  isLegalArithImmed(C + 1ULL))) {
1916  CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
1917  C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
1918  RHS = DAG.getConstant(C, dl, VT);
1919  }
1920  break;
1921  }
1922  }
1923  }
1924 
1925  // Comparisons are canonicalized so that the RHS operand is simpler than the
1926  // LHS one, the extreme case being when RHS is an immediate. However, AArch64
1927  // can fold some shift+extend operations on the RHS operand, so swap the
1928  // operands if that can be done.
1929  //
1930  // For example:
1931  // lsl w13, w11, #1
1932  // cmp w13, w12
1933  // can be turned into:
1934  // cmp w12, w11, lsl #1
1935  if (!isa<ConstantSDNode>(RHS) ||
1936  !isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) {
1937  SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
1938 
1940  std::swap(LHS, RHS);
1942  }
1943  }
1944 
1945  SDValue Cmp;
1946  AArch64CC::CondCode AArch64CC;
1947  if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
1948  const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
1949 
1950  // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
1951  // For the i8 operand, the largest immediate is 255, so this can be easily
1952  // encoded in the compare instruction. For the i16 operand, however, the
1953  // largest immediate cannot be encoded in the compare.
1954  // Therefore, use a sign extending load and cmn to avoid materializing the
1955  // -1 constant. For example,
1956  // movz w1, #65535
1957  // ldrh w0, [x0, #0]
1958  // cmp w0, w1
1959  // >
1960  // ldrsh w0, [x0, #0]
1961  // cmn w0, #1
1962  // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
1963  // if and only if (sext LHS) == (sext RHS). The checks are in place to
1964  // ensure both the LHS and RHS are truly zero extended and to make sure the
1965  // transformation is profitable.
1966  if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
1967  cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
1968  cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
1969  LHS.getNode()->hasNUsesOfValue(1, 0)) {
1970  int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
1971  if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
1972  SDValue SExt =
1973  DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
1974  DAG.getValueType(MVT::i16));
1975  Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
1976  RHS.getValueType()),
1977  CC, dl, DAG);
1978  AArch64CC = changeIntCCToAArch64CC(CC);
1979  }
1980  }
1981 
1982  if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
1983  if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) {
1984  if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
1985  AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
1986  }
1987  }
1988  }
1989 
1990  if (!Cmp) {
1991  Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
1992  AArch64CC = changeIntCCToAArch64CC(CC);
1993  }
1994  AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
1995  return Cmp;
1996 }
1997 
1998 static std::pair<SDValue, SDValue>
2000  assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&
2001  "Unsupported value type");
2002  SDValue Value, Overflow;
2003  SDLoc DL(Op);
2004  SDValue LHS = Op.getOperand(0);
2005  SDValue RHS = Op.getOperand(1);
2006  unsigned Opc = 0;
2007  switch (Op.getOpcode()) {
2008  default:
2009  llvm_unreachable("Unknown overflow instruction!");
2010  case ISD::SADDO:
2011  Opc = AArch64ISD::ADDS;
2012  CC = AArch64CC::VS;
2013  break;
2014  case ISD::UADDO:
2015  Opc = AArch64ISD::ADDS;
2016  CC = AArch64CC::HS;
2017  break;
2018  case ISD::SSUBO:
2019  Opc = AArch64ISD::SUBS;
2020  CC = AArch64CC::VS;
2021  break;
2022  case ISD::USUBO:
2023  Opc = AArch64ISD::SUBS;
2024  CC = AArch64CC::LO;
2025  break;
2026  // Multiply needs a little bit extra work.
2027  case ISD::SMULO:
2028  case ISD::UMULO: {
2029  CC = AArch64CC::NE;
2030  bool IsSigned = Op.getOpcode() == ISD::SMULO;
2031  if (Op.getValueType() == MVT::i32) {
2032  unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
2033  // For a 32 bit multiply with overflow check we want the instruction
2034  // selector to generate a widening multiply (SMADDL/UMADDL). For that we
2035  // need to generate the following pattern:
2036  // (i64 add 0, (i64 mul (i64 sext|zext i32 %a), (i64 sext|zext i32 %b))
2037  LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
2038  RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
2039  SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
2040  SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul,
2041  DAG.getConstant(0, DL, MVT::i64));
2042  // On AArch64 the upper 32 bits are always zero extended for a 32 bit
2043  // operation. We need to clear out the upper 32 bits, because we used a
2044  // widening multiply that wrote all 64 bits. In the end this should be a
2045  // noop.
2046  Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add);
2047  if (IsSigned) {
2048  // The signed overflow check requires more than just a simple check for
2049  // any bit set in the upper 32 bits of the result. These bits could be
2050  // just the sign bits of a negative number. To perform the overflow
2051  // check we have to arithmetic shift right the 32nd bit of the result by
2052  // 31 bits. Then we compare the result to the upper 32 bits.
2053  SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add,
2054  DAG.getConstant(32, DL, MVT::i64));
2055  UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits);
2056  SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value,
2057  DAG.getConstant(31, DL, MVT::i64));
2058  // It is important that LowerBits is last, otherwise the arithmetic
2059  // shift will not be folded into the compare (SUBS).
2060  SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
2061  Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
2062  .getValue(1);
2063  } else {
2064  // The overflow check for unsigned multiply is easy. We only need to
2065  // check if any of the upper 32 bits are set. This can be done with a
2066  // CMP (shifted register). For that we need to generate the following
2067  // pattern:
2068  // (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
2069  SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2070  DAG.getConstant(32, DL, MVT::i64));
2071  SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2072  Overflow =
2073  DAG.getNode(AArch64ISD::SUBS, DL, VTs,
2074  DAG.getConstant(0, DL, MVT::i64),
2075  UpperBits).getValue(1);
2076  }
2077  break;
2078  }
2079  assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type");
2080  // For the 64 bit multiply
2081  Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
2082  if (IsSigned) {
2083  SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
2084  SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
2085  DAG.getConstant(63, DL, MVT::i64));
2086  // It is important that LowerBits is last, otherwise the arithmetic
2087  // shift will not be folded into the compare (SUBS).
2088  SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2089  Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
2090  .getValue(1);
2091  } else {
2092  SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
2093  SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2094  Overflow =
2095  DAG.getNode(AArch64ISD::SUBS, DL, VTs,
2096  DAG.getConstant(0, DL, MVT::i64),
2097  UpperBits).getValue(1);
2098  }
2099  break;
2100  }
2101  } // switch (...)
2102 
2103  if (Opc) {
2104  SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
2105 
2106  // Emit the AArch64 operation with overflow check.
2107  Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
2108  Overflow = Value.getValue(1);
2109  }
2110  return std::make_pair(Value, Overflow);
2111 }
2112 
2113 SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
2114  RTLIB::Libcall Call) const {
2115  SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
2116  return makeLibCall(DAG, Call, MVT::f128, Ops, false, SDLoc(Op)).first;
2117 }
2118 
2119 // Returns true if the given Op is the overflow flag result of an overflow
2120 // intrinsic operation.
2121 static bool isOverflowIntrOpRes(SDValue Op) {
2122  unsigned Opc = Op.getOpcode();
2123  return (Op.getResNo() == 1 &&
2124  (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
2125  Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO));
2126 }
2127 
2129  SDValue Sel = Op.getOperand(0);
2130  SDValue Other = Op.getOperand(1);
2131  SDLoc dl(Sel);
2132 
2133  // If the operand is an overflow checking operation, invert the condition
2134  // code and kill the Not operation. I.e., transform:
2135  // (xor (overflow_op_bool, 1))
2136  // -->
2137  // (csel 1, 0, invert(cc), overflow_op_bool)
2138  // ... which later gets transformed to just a cset instruction with an
2139  // inverted condition code, rather than a cset + eor sequence.
2140  if (isOneConstant(Other) && isOverflowIntrOpRes(Sel)) {
2141  // Only lower legal XALUO ops.
2142  if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
2143  return SDValue();
2144 
2145  SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
2146  SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
2148  SDValue Value, Overflow;
2149  std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
2150  SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
2151  return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
2152  CCVal, Overflow);
2153  }
2154  // If neither operand is a SELECT_CC, give up.
2155  if (Sel.getOpcode() != ISD::SELECT_CC)
2156  std::swap(Sel, Other);
2157  if (Sel.getOpcode() != ISD::SELECT_CC)
2158  return Op;
2159 
2160  // The folding we want to perform is:
2161  // (xor x, (select_cc a, b, cc, 0, -1) )
2162  // -->
2163  // (csel x, (xor x, -1), cc ...)
2164  //
2165  // The latter will get matched to a CSINV instruction.
2166 
2167  ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
2168  SDValue LHS = Sel.getOperand(0);
2169  SDValue RHS = Sel.getOperand(1);
2170  SDValue TVal = Sel.getOperand(2);
2171  SDValue FVal = Sel.getOperand(3);
2172 
2173  // FIXME: This could be generalized to non-integer comparisons.
2174  if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
2175  return Op;
2176 
2177  ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
2178  ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
2179 
2180  // The values aren't constants, this isn't the pattern we're looking for.
2181  if (!CFVal || !CTVal)
2182  return Op;
2183 
2184  // We can commute the SELECT_CC by inverting the condition. This
2185  // might be needed to make this fit into a CSINV pattern.
2186  if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
2187  std::swap(TVal, FVal);
2188  std::swap(CTVal, CFVal);
2189  CC = ISD::getSetCCInverse(CC, true);
2190  }
2191 
2192  // If the constants line up, perform the transform!
2193  if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
2194  SDValue CCVal;
2195  SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
2196 
2197  FVal = Other;
2198  TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
2199  DAG.getConstant(-1ULL, dl, Other.getValueType()));
2200 
2201  return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
2202  CCVal, Cmp);
2203  }
2204 
2205  return Op;
2206 }
2207 
2209  EVT VT = Op.getValueType();
2210 
2211  // Let legalize expand this if it isn't a legal type yet.
2212  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
2213  return SDValue();
2214 
2215  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
2216 
2217  unsigned Opc;
2218  bool ExtraOp = false;
2219  switch (Op.getOpcode()) {
2220  default:
2221  llvm_unreachable("Invalid code");
2222  case ISD::ADDC:
2223  Opc = AArch64ISD::ADDS;
2224  break;
2225  case ISD::SUBC:
2226  Opc = AArch64ISD::SUBS;
2227  break;
2228  case ISD::ADDE:
2229  Opc = AArch64ISD::ADCS;
2230  ExtraOp = true;
2231  break;
2232  case ISD::SUBE:
2233  Opc = AArch64ISD::SBCS;
2234  ExtraOp = true;
2235  break;
2236  }
2237 
2238  if (!ExtraOp)
2239  return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
2240  return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
2241  Op.getOperand(2));
2242 }
2243 
2245  // Let legalize expand this if it isn't a legal type yet.
2247  return SDValue();
2248 
2249  SDLoc dl(Op);
2251  // The actual operation that sets the overflow or carry flag.
2252  SDValue Value, Overflow;
2253  std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
2254 
2255  // We use 0 and 1 as false and true values.
2256  SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
2257  SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
2258 
2259  // We use an inverted condition, because the conditional select is inverted
2260  // too. This will allow it to be selected to a single instruction:
2261  // CSINC Wd, WZR, WZR, invert(cond).
2262  SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
2263  Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
2264  CCVal, Overflow);
2265 
2266  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
2267  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
2268 }
2269 
2270 // Prefetch operands are:
2271 // 1: Address to prefetch
2272 // 2: bool isWrite
2273 // 3: int locality (0 = no locality ... 3 = extreme locality)
2274 // 4: bool isDataCache
2276  SDLoc DL(Op);
2277  unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
2278  unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
2279  unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
2280 
2281  bool IsStream = !Locality;
2282  // When the locality number is set
2283  if (Locality) {
2284  // The front-end should have filtered out the out-of-range values
2285  assert(Locality <= 3 && "Prefetch locality out-of-range");
2286  // The locality degree is the opposite of the cache speed.
2287  // Put the number the other way around.
2288  // The encoding starts at 0 for level 1
2289  Locality = 3 - Locality;
2290  }
2291 
2292  // built the mask value encoding the expected behavior.
2293  unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
2294  (!IsData << 3) | // IsDataCache bit
2295  (Locality << 1) | // Cache level bits
2296  (unsigned)IsStream; // Stream bit
2297  return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
2298  DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
2299 }
2300 
2301 SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
2302  SelectionDAG &DAG) const {
2303  assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
2304 
2305  RTLIB::Libcall LC;
2307 
2308  return LowerF128Call(Op, DAG, LC);
2309 }
2310 
2311 SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
2312  SelectionDAG &DAG) const {
2313  if (Op.getOperand(0).getValueType() != MVT::f128) {
2314  // It's legal except when f128 is involved
2315  return Op;
2316  }
2317 
2318  RTLIB::Libcall LC;
2320 
2321  // FP_ROUND node has a second operand indicating whether it is known to be
2322  // precise. That doesn't take part in the LibCall so we can't directly use
2323  // LowerF128Call.
2324  SDValue SrcVal = Op.getOperand(0);
2325  return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
2326  SDLoc(Op)).first;
2327 }
2328 
2330  // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
2331  // Any additional optimization in this function should be recorded
2332  // in the cost tables.
2333  EVT InVT = Op.getOperand(0).getValueType();
2334  EVT VT = Op.getValueType();
2335  unsigned NumElts = InVT.getVectorNumElements();
2336 
2337  // f16 vectors are promoted to f32 before a conversion.
2338  if (InVT.getVectorElementType() == MVT::f16) {
2339  MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
2340  SDLoc dl(Op);
2341  return DAG.getNode(
2342  Op.getOpcode(), dl, Op.getValueType(),
2343  DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
2344  }
2345 
2346  if (VT.getSizeInBits() < InVT.getSizeInBits()) {
2347  SDLoc dl(Op);
2348  SDValue Cv =
2349  DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
2350  Op.getOperand(0));
2351  return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
2352  }
2353 
2354  if (VT.getSizeInBits() > InVT.getSizeInBits()) {
2355  SDLoc dl(Op);
2356  MVT ExtVT =
2358  VT.getVectorNumElements());
2359  SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
2360  return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
2361  }
2362 
2363  // Type changing conversions are illegal.
2364  return Op;
2365 }
2366 
2367 SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
2368  SelectionDAG &DAG) const {
2369  if (Op.getOperand(0).getValueType().isVector())
2370  return LowerVectorFP_TO_INT(Op, DAG);
2371 
2372  // f16 conversions are promoted to f32 when full fp16 is not supported.
2373  if (Op.getOperand(0).getValueType() == MVT::f16 &&
2374  !Subtarget->hasFullFP16()) {
2375  SDLoc dl(Op);
2376  return DAG.getNode(
2377  Op.getOpcode(), dl, Op.getValueType(),
2378  DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0)));
2379  }
2380 
2381  if (Op.getOperand(0).getValueType() != MVT::f128) {
2382  // It's legal except when f128 is involved
2383  return Op;
2384  }
2385 
2386  RTLIB::Libcall LC;
2387  if (Op.getOpcode() == ISD::FP_TO_SINT)
2389  else
2391 
2392  SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
2393  return makeLibCall(DAG, LC, Op.getValueType(), Ops, false, SDLoc(Op)).first;
2394 }
2395 
2397  // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
2398  // Any additional optimization in this function should be recorded
2399  // in the cost tables.
2400  EVT VT = Op.getValueType();
2401  SDLoc dl(Op);
2402  SDValue In = Op.getOperand(0);
2403  EVT InVT = In.getValueType();
2404 
2405  if (VT.getSizeInBits() < InVT.getSizeInBits()) {
2406  MVT CastVT =
2408  InVT.getVectorNumElements());
2409  In = DAG.getNode(Op.getOpcode(), dl, CastVT, In);
2410  return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
2411  }
2412 
2413  if (VT.getSizeInBits() > InVT.getSizeInBits()) {
2414  unsigned CastOpc =
2416  EVT CastVT = VT.changeVectorElementTypeToInteger();
2417  In = DAG.getNode(CastOpc, dl, CastVT, In);
2418  return DAG.getNode(Op.getOpcode(), dl, VT, In);
2419  }
2420 
2421  return Op;
2422 }
2423 
2424 SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
2425  SelectionDAG &DAG) const {
2426  if (Op.getValueType().isVector())
2427  return LowerVectorINT_TO_FP(Op, DAG);
2428 
2429  // f16 conversions are promoted to f32 when full fp16 is not supported.
2430  if (Op.getValueType() == MVT::f16 &&
2431  !Subtarget->hasFullFP16()) {
2432  SDLoc dl(Op);
2433  return DAG.getNode(
2434  ISD::FP_ROUND, dl, MVT::f16,
2435  DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)),
2436  DAG.getIntPtrConstant(0, dl));
2437  }
2438 
2439  // i128 conversions are libcalls.
2440  if (Op.getOperand(0).getValueType() == MVT::i128)
2441  return SDValue();
2442 
2443  // Other conversions are legal, unless it's to the completely software-based
2444  // fp128.
2445  if (Op.getValueType() != MVT::f128)
2446  return Op;
2447 
2448  RTLIB::Libcall LC;
2449  if (Op.getOpcode() == ISD::SINT_TO_FP)
2451  else
2453 
2454  return LowerF128Call(Op, DAG, LC);
2455 }
2456 
2457 SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
2458  SelectionDAG &DAG) const {
2459  // For iOS, we want to call an alternative entry point: __sincos_stret,
2460  // which returns the values in two S / D registers.
2461  SDLoc dl(Op);
2462  SDValue Arg = Op.getOperand(0);
2463  EVT ArgVT = Arg.getValueType();
2464  Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
2465 
2466  ArgListTy Args;
2467  ArgListEntry Entry;
2468 
2469  Entry.Node = Arg;
2470  Entry.Ty = ArgTy;
2471  Entry.IsSExt = false;
2472  Entry.IsZExt = false;
2473  Args.push_back(Entry);
2474 
2475  RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
2476  : RTLIB::SINCOS_STRET_F32;
2477  const char *LibcallName = getLibcallName(LC);
2478  SDValue Callee =
2479  DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
2480 
2481  StructType *RetTy = StructType::get(ArgTy, ArgTy);
2483  CLI.setDebugLoc(dl)
2484  .setChain(DAG.getEntryNode())
2485  .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
2486 
2487  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2488  return CallResult.first;
2489 }
2490 
2492  if (Op.getValueType() != MVT::f16)
2493  return SDValue();
2494 
2495  assert(Op.getOperand(0).getValueType() == MVT::i16);
2496  SDLoc DL(Op);
2497 
2498  Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
2499  Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
2500  return SDValue(
2501  DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f16, Op,
2502  DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
2503  0);
2504 }
2505 
2506 static EVT getExtensionTo64Bits(const EVT &OrigVT) {
2507  if (OrigVT.getSizeInBits() >= 64)
2508  return OrigVT;
2509 
2510  assert(OrigVT.isSimple() && "Expecting a simple value type");
2511 
2512  MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
2513  switch (OrigSimpleTy) {
2514  default: llvm_unreachable("Unexpected Vector Type");
2515  case MVT::v2i8:
2516  case MVT::v2i16:
2517  return MVT::v2i32;
2518  case MVT::v4i8:
2519  return MVT::v4i16;
2520  }
2521 }
2522 
2524  const EVT &OrigTy,
2525  const EVT &ExtTy,
2526  unsigned ExtOpcode) {
2527  // The vector originally had a size of OrigTy. It was then extended to ExtTy.
2528  // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
2529  // 64-bits we need to insert a new extension so that it will be 64-bits.
2530  assert(ExtTy.is128BitVector() && "Unexpected extension size");
2531  if (OrigTy.getSizeInBits() >= 64)
2532  return N;
2533 
2534  // Must extend size to at least 64 bits to be used as an operand for VMULL.
2535  EVT NewVT = getExtensionTo64Bits(OrigTy);
2536 
2537  return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
2538 }
2539 
2541  bool isSigned) {
2542  EVT VT = N->getValueType(0);
2543 
2544  if (N->getOpcode() != ISD::BUILD_VECTOR)
2545  return false;
2546 
2547  for (const SDValue &Elt : N->op_values()) {
2548  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2549  unsigned EltSize = VT.getScalarSizeInBits();
2550  unsigned HalfSize = EltSize / 2;
2551  if (isSigned) {
2552  if (!isIntN(HalfSize, C->getSExtValue()))
2553  return false;
2554  } else {
2555  if (!isUIntN(HalfSize, C->getZExtValue()))
2556  return false;
2557  }
2558  continue;
2559  }
2560  return false;
2561  }
2562 
2563  return true;
2564 }
2565 
2567  if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
2569  N->getOperand(0)->getValueType(0),
2570  N->getValueType(0),
2571  N->getOpcode());
2572 
2573  assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
2574  EVT VT = N->getValueType(0);
2575  SDLoc dl(N);
2576  unsigned EltSize = VT.getScalarSizeInBits() / 2;
2577  unsigned NumElts = VT.getVectorNumElements();
2578  MVT TruncVT = MVT::getIntegerVT(EltSize);
2580  for (unsigned i = 0; i != NumElts; ++i) {
2581  ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
2582  const APInt &CInt = C->getAPIntValue();
2583  // Element types smaller than 32 bits are not legal, so use i32 elements.
2584  // The values are implicitly truncated so sext vs. zext doesn't matter.
2585  Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
2586  }
2587  return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
2588 }
2589 
2590 static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
2591  return N->getOpcode() == ISD::SIGN_EXTEND ||
2592  isExtendedBUILD_VECTOR(N, DAG, true);
2593 }
2594 
2595 static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
2596  return N->getOpcode() == ISD::ZERO_EXTEND ||
2597  isExtendedBUILD_VECTOR(N, DAG, false);
2598 }
2599 
2600 static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
2601  unsigned Opcode = N->getOpcode();
2602  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
2603  SDNode *N0 = N->getOperand(0).getNode();
2604  SDNode *N1 = N->getOperand(1).getNode();
2605  return N0->hasOneUse() && N1->hasOneUse() &&
2606  isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
2607  }
2608  return false;
2609 }
2610 
2611 static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
2612  unsigned Opcode = N->getOpcode();
2613  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
2614  SDNode *N0 = N->getOperand(0).getNode();
2615  SDNode *N1 = N->getOperand(1).getNode();
2616  return N0->hasOneUse() && N1->hasOneUse() &&
2617  isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
2618  }
2619  return false;
2620 }
2621 
2622 SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
2623  SelectionDAG &DAG) const {
2624  // The rounding mode is in bits 23:22 of the FPSCR.
2625  // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
2626  // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
2627  // so that the shift + and get folded into a bitfield extract.
2628  SDLoc dl(Op);
2629 
2630  SDValue FPCR_64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i64,
2632  MVT::i64));
2633  SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
2634  SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
2635  DAG.getConstant(1U << 22, dl, MVT::i32));
2636  SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
2637  DAG.getConstant(22, dl, MVT::i32));
2638  return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
2639  DAG.getConstant(3, dl, MVT::i32));
2640 }
2641 
2643  // Multiplications are only custom-lowered for 128-bit vectors so that
2644  // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
2645  EVT VT = Op.getValueType();
2646  assert(VT.is128BitVector() && VT.isInteger() &&
2647  "unexpected type for custom-lowering ISD::MUL");
2648  SDNode *N0 = Op.getOperand(0).getNode();
2649  SDNode *N1 = Op.getOperand(1).getNode();
2650  unsigned NewOpc = 0;
2651  bool isMLA = false;
2652  bool isN0SExt = isSignExtended(N0, DAG);
2653  bool isN1SExt = isSignExtended(N1, DAG);
2654  if (isN0SExt && isN1SExt)
2655  NewOpc = AArch64ISD::SMULL;
2656  else {
2657  bool isN0ZExt = isZeroExtended(N0, DAG);
2658  bool isN1ZExt = isZeroExtended(N1, DAG);
2659  if (isN0ZExt && isN1ZExt)
2660  NewOpc = AArch64ISD::UMULL;
2661  else if (isN1SExt || isN1ZExt) {
2662  // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
2663  // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
2664  if (isN1SExt && isAddSubSExt(N0, DAG)) {
2665  NewOpc = AArch64ISD::SMULL;
2666  isMLA = true;
2667  } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
2668  NewOpc = AArch64ISD::UMULL;
2669  isMLA = true;
2670  } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
2671  std::swap(N0, N1);
2672  NewOpc = AArch64ISD::UMULL;
2673  isMLA = true;
2674  }
2675  }
2676 
2677  if (!NewOpc) {
2678  if (VT == MVT::v2i64)
2679  // Fall through to expand this. It is not legal.
2680  return SDValue();
2681  else
2682  // Other vector multiplications are legal.
2683  return Op;
2684  }
2685  }
2686 
2687  // Legalize to a S/UMULL instruction
2688  SDLoc DL(Op);
2689  SDValue Op0;
2690  SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
2691  if (!isMLA) {
2692  Op0 = skipExtensionForVectorMULL(N0, DAG);
2693  assert(Op0.getValueType().is64BitVector() &&
2694  Op1.getValueType().is64BitVector() &&
2695  "unexpected types for extended operands to VMULL");
2696  return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
2697  }
2698  // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
2699  // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
2700  // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
2701  SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
2702  SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
2703  EVT Op1VT = Op1.getValueType();
2704  return DAG.getNode(N0->getOpcode(), DL, VT,
2705  DAG.getNode(NewOpc, DL, VT,
2706  DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
2707  DAG.getNode(NewOpc, DL, VT,
2708  DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
2709 }
2710 
2711 SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
2712  SelectionDAG &DAG) const {
2713  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2714  SDLoc dl(Op);
2715  switch (IntNo) {
2716  default: return SDValue(); // Don't custom lower most intrinsics.
2718  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2719  return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
2720  }
2722  EVT Ty = Op.getValueType();
2723  if (Ty == MVT::i64) {
2724  SDValue Result = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64,
2725  Op.getOperand(1));
2726  Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
2727  return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
2728  } else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
2729  return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1));
2730  } else {
2731  report_fatal_error("Unexpected type for AArch64 NEON intrinic");
2732  }
2733  }
2735  return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
2736  Op.getOperand(1), Op.getOperand(2));
2738  return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
2739  Op.getOperand(1), Op.getOperand(2));
2741  return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
2742  Op.getOperand(1), Op.getOperand(2));
2744  return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
2745  Op.getOperand(1), Op.getOperand(2));
2746 
2747  case Intrinsic::localaddress: {
2748  // Returns one of the stack, base, or frame pointer registers, depending on
2749  // which is used to reference local variables.
2750  MachineFunction &MF = DAG.getMachineFunction();
2751  const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2752  unsigned Reg;
2753  if (RegInfo->hasBasePointer(MF))
2754  Reg = RegInfo->getBaseRegister();
2755  else // This function handles the SP or FP case.
2756  Reg = RegInfo->getFrameRegister(MF);
2757  return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
2758  Op.getSimpleValueType());
2759  }
2760 
2761  case Intrinsic::eh_recoverfp: {
2762  // FIXME: This needs to be implemented to correctly handle highly aligned
2763  // stack objects. For now we simply return the incoming FP. Refer D53541
2764  // for more details.
2765  SDValue FnOp = Op.getOperand(1);
2766  SDValue IncomingFPOp = Op.getOperand(2);
2768  auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
2769  if (!Fn)
2771  "llvm.eh.recoverfp must take a function as the first argument");
2772  return IncomingFPOp;
2773  }
2774  }
2775 }
2776 
2777 // Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
2779  EVT VT, EVT MemVT,
2780  SelectionDAG &DAG) {
2781  assert(VT.isVector() && "VT should be a vector type");
2782  assert(MemVT == MVT::v4i8 && VT == MVT::v4i16);
2783 
2784  SDValue Value = ST->getValue();
2785 
2786  // It first extend the promoted v4i16 to v8i16, truncate to v8i8, and extract
2787  // the word lane which represent the v4i8 subvector. It optimizes the store
2788  // to:
2789  //
2790  // xtn v0.8b, v0.8h
2791  // str s0, [x0]
2792 
2793  SDValue Undef = DAG.getUNDEF(MVT::i16);
2794  SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
2795  {Undef, Undef, Undef, Undef});
2796 
2797  SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
2798  Value, UndefVec);
2799  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);
2800 
2801  Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
2802  SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
2803  Trunc, DAG.getConstant(0, DL, MVT::i64));
2804 
2805  return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
2806  ST->getBasePtr(), ST->getMemOperand());
2807 }
2808 
2809 // Custom lowering for any store, vector or scalar and/or default or with
2810 // a truncate operations. Currently only custom lower truncate operation
2811 // from vector v4i16 to v4i8.
2812 SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
2813  SelectionDAG &DAG) const {
2814  SDLoc Dl(Op);
2815  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
2816  assert (StoreNode && "Can only custom lower store nodes");
2817 
2818  SDValue Value = StoreNode->getValue();
2819 
2820  EVT VT = Value.getValueType();
2821  EVT MemVT = StoreNode->getMemoryVT();
2822 
2823  assert (VT.isVector() && "Can only custom lower vector store types");
2824 
2825  unsigned AS = StoreNode->getAddressSpace();
2826  unsigned Align = StoreNode->getAlignment();
2827  if (Align < MemVT.getStoreSize() &&
2828  !allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) {
2829  return scalarizeVectorStore(StoreNode, DAG);
2830  }
2831 
2832  if (StoreNode->isTruncatingStore()) {
2833  return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
2834  }
2835 
2836  return SDValue();
2837 }
2838 
2840  SelectionDAG &DAG) const {
2841  LLVM_DEBUG(dbgs() << "Custom lowering: ");
2842  LLVM_DEBUG(Op.dump());
2843 
2844  switch (Op.getOpcode()) {
2845  default:
2846  llvm_unreachable("unimplemented operand");
2847  return SDValue();
2848  case ISD::BITCAST:
2849  return LowerBITCAST(Op, DAG);
2850  case ISD::GlobalAddress:
2851  return LowerGlobalAddress(Op, DAG);
2852  case ISD::GlobalTLSAddress:
2853  return LowerGlobalTLSAddress(Op, DAG);
2854  case ISD::SETCC:
2855  return LowerSETCC(Op, DAG);
2856  case ISD::BR_CC:
2857  return LowerBR_CC(Op, DAG);
2858  case ISD::SELECT:
2859  return LowerSELECT(Op, DAG);
2860  case ISD::SELECT_CC:
2861  return LowerSELECT_CC(Op, DAG);
2862  case ISD::JumpTable:
2863  return LowerJumpTable(Op, DAG);
2864  case ISD::BR_JT:
2865  return LowerBR_JT(Op, DAG);
2866  case ISD::ConstantPool:
2867  return LowerConstantPool(Op, DAG);
2868  case ISD::BlockAddress:
2869  return LowerBlockAddress(Op, DAG);
2870  case ISD::VASTART:
2871  return LowerVASTART(Op, DAG);
2872  case ISD::VACOPY:
2873  return LowerVACOPY(Op, DAG);
2874  case ISD::VAARG:
2875  return LowerVAARG(Op, DAG);
2876  case ISD::ADDC:
2877  case ISD::ADDE:
2878  case ISD::SUBC:
2879  case ISD::SUBE:
2880  return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
2881  case ISD::SADDO:
2882  case ISD::UADDO:
2883  case ISD::SSUBO:
2884  case ISD::USUBO:
2885  case ISD::SMULO:
2886  case ISD::UMULO:
2887  return LowerXALUO(Op, DAG);
2888  case ISD::FADD:
2889  return LowerF128Call(Op, DAG, RTLIB::ADD_F128);
2890  case ISD::FSUB:
2891  return LowerF128Call(Op, DAG, RTLIB::SUB_F128);
2892  case ISD::FMUL:
2893  return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
2894  case ISD::FDIV:
2895  return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
2896  case ISD::FP_ROUND:
2897  return LowerFP_ROUND(Op, DAG);
2898  case ISD::FP_EXTEND:
2899  return LowerFP_EXTEND(Op, DAG);
2900  case ISD::FRAMEADDR:
2901  return LowerFRAMEADDR(Op, DAG);
2902  case ISD::SPONENTRY:
2903  return LowerSPONENTRY(Op, DAG);
2904  case ISD::RETURNADDR:
2905  return LowerRETURNADDR(Op, DAG);
2906  case ISD::ADDROFRETURNADDR:
2907  return LowerADDROFRETURNADDR(Op, DAG);
2909  return LowerINSERT_VECTOR_ELT(Op, DAG);
2911  return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2912  case ISD::BUILD_VECTOR:
2913  return LowerBUILD_VECTOR(Op, DAG);
2914  case ISD::VECTOR_SHUFFLE:
2915  return LowerVECTOR_SHUFFLE(Op, DAG);
2917  return LowerEXTRACT_SUBVECTOR(Op, DAG);
2918  case ISD::SRA:
2919  case ISD::SRL:
2920  case ISD::SHL:
2921  return LowerVectorSRA_SRL_SHL(Op, DAG);
2922  case ISD::SHL_PARTS:
2923  return LowerShiftLeftParts(Op, DAG);
2924  case ISD::SRL_PARTS:
2925  case ISD::SRA_PARTS:
2926  return LowerShiftRightParts(Op, DAG);
2927  case ISD::CTPOP:
2928  return LowerCTPOP(Op, DAG);
2929  case ISD::FCOPYSIGN:
2930  return LowerFCOPYSIGN(Op, DAG);
2931  case ISD::AND:
2932  return LowerVectorAND(Op, DAG);
2933  case ISD::OR:
2934  return LowerVectorOR(Op, DAG);
2935  case ISD::XOR:
2936  return LowerXOR(Op, DAG);
2937  case ISD::PREFETCH:
2938  return LowerPREFETCH(Op, DAG);
2939  case ISD::SINT_TO_FP:
2940  case ISD::UINT_TO_FP:
2941  return LowerINT_TO_FP(Op, DAG);
2942  case ISD::FP_TO_SINT:
2943  case ISD::FP_TO_UINT:
2944  return LowerFP_TO_INT(Op, DAG);
2945  case ISD::FSINCOS:
2946  return LowerFSINCOS(Op, DAG);
2947  case ISD::FLT_ROUNDS_:
2948  return LowerFLT_ROUNDS_(Op, DAG);
2949  case ISD::MUL:
2950  return LowerMUL(Op, DAG);
2952  return LowerINTRINSIC_WO_CHAIN(Op, DAG);
2953  case ISD::STORE:
2954  return LowerSTORE(Op, DAG);
2955  case ISD::VECREDUCE_ADD:
2956  case ISD::VECREDUCE_SMAX:
2957  case ISD::VECREDUCE_SMIN:
2958  case ISD::VECREDUCE_UMAX:
2959  case ISD::VECREDUCE_UMIN:
2960  case ISD::VECREDUCE_FMAX:
2961  case ISD::VECREDUCE_FMIN:
2962  return LowerVECREDUCE(Op, DAG);
2963  case ISD::ATOMIC_LOAD_SUB:
2964  return LowerATOMIC_LOAD_SUB(Op, DAG);
2965  case ISD::ATOMIC_LOAD_AND:
2966  return LowerATOMIC_LOAD_AND(Op, DAG);
2968  return LowerDYNAMIC_STACKALLOC(Op, DAG);
2969  }
2970 }
2971 
2972 //===----------------------------------------------------------------------===//
2973 // Calling Convention Implementation
2974 //===----------------------------------------------------------------------===//
2975 
2976 #include "AArch64GenCallingConv.inc"
2977 
2978 /// Selects the correct CCAssignFn for a given CallingConvention value.
2980  bool IsVarArg) const {
2981  switch (CC) {
2982  default:
2983  report_fatal_error("Unsupported calling convention.");
2985  return CC_AArch64_WebKit_JS;
2986  case CallingConv::GHC:
2987  return CC_AArch64_GHC;
2988  case CallingConv::C:
2989  case CallingConv::Fast:
2992  case CallingConv::Swift:
2993  if (Subtarget->isTargetWindows() && IsVarArg)
2994  return CC_AArch64_Win64_VarArg;
2995  if (!Subtarget->isTargetDarwin())
2996  return CC_AArch64_AAPCS;
2997  return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS;
2998  case CallingConv::Win64:
2999  return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
3001  return CC_AArch64_AAPCS;
3002  }
3003 }
3004 
3005 CCAssignFn *
3007  return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3008  : RetCC_AArch64_AAPCS;
3009 }
3010 
3011 SDValue AArch64TargetLowering::LowerFormalArguments(
3012  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3013  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3014  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3015  MachineFunction &MF = DAG.getMachineFunction();
3016  MachineFrameInfo &MFI = MF.getFrameInfo();
3017  bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
3018 
3019  // Assign locations to all of the incoming arguments.
3021  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3022  *DAG.getContext());
3023 
3024  // At this point, Ins[].VT may already be promoted to i32. To correctly
3025  // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
3026  // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
3027  // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
3028  // we use a special version of AnalyzeFormalArguments to pass in ValVT and
3029  // LocVT.
3030  unsigned NumArgs = Ins.size();
3032  unsigned CurArgIdx = 0;
3033  for (unsigned i = 0; i != NumArgs; ++i) {
3034  MVT ValVT = Ins[i].VT;
3035  if (Ins[i].isOrigArg()) {
3036  std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
3037  CurArgIdx = Ins[i].getOrigArgIndex();
3038 
3039  // Get type of the original argument.
3040  EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
3041  /*AllowUnknown*/ true);
3042  MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
3043  // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
3044  if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
3045  ValVT = MVT::i8;
3046  else if (ActualMVT == MVT::i16)
3047  ValVT = MVT::i16;
3048  }
3049  CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
3050  bool Res =
3051  AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
3052  assert(!Res && "Call operand has unhandled type");
3053  (void)Res;
3054  }
3055  assert(ArgLocs.size() == Ins.size());
3056  SmallVector<SDValue, 16> ArgValues;
3057  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3058  CCValAssign &VA = ArgLocs[i];
3059 
3060  if (Ins[i].Flags.isByVal()) {
3061  // Byval is used for HFAs in the PCS, but the system should work in a
3062  // non-compliant manner for larger structs.
3063  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3064  int Size = Ins[i].Flags.getByValSize();
3065  unsigned NumRegs = (Size + 7) / 8;
3066 
3067  // FIXME: This works on big-endian for composite byvals, which are the common
3068  // case. It should also work for fundamental types too.
3069  unsigned FrameIdx =
3070  MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
3071  SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
3072  InVals.push_back(FrameIdxN);
3073 
3074  continue;
3075  }
3076 
3077  if (VA.isRegLoc()) {
3078  // Arguments stored in registers.
3079  EVT RegVT = VA.getLocVT();
3080 
3081  SDValue ArgValue;
3082  const TargetRegisterClass *RC;
3083 
3084  if (RegVT == MVT::i32)
3085  RC = &AArch64::GPR32RegClass;
3086  else if (RegVT == MVT::i64)
3087  RC = &AArch64::GPR64RegClass;
3088  else if (RegVT == MVT::f16)
3089  RC = &AArch64::FPR16RegClass;
3090  else if (RegVT == MVT::f32)
3091  RC = &AArch64::FPR32RegClass;
3092  else if (RegVT == MVT::f64 || RegVT.is64BitVector())
3093  RC = &AArch64::FPR64RegClass;
3094  else if (RegVT == MVT::f128 || RegVT.is128BitVector())
3095  RC = &AArch64::FPR128RegClass;
3096  else
3097  llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
3098 
3099  // Transform the arguments in physical registers into virtual ones.
3100  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3101  ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
3102 
3103  // If this is an 8, 16 or 32-bit value, it is really passed promoted
3104  // to 64 bits. Insert an assert[sz]ext to capture this, then
3105  // truncate to the right size.
3106  switch (VA.getLocInfo()) {
3107  default:
3108  llvm_unreachable("Unknown loc info!");
3109  case CCValAssign::Full:
3110  break;
3111  case CCValAssign::BCvt:
3112  ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
3113  break;
3114  case CCValAssign::AExt:
3115  case CCValAssign::SExt:
3116  case CCValAssign::ZExt:
3117  // SelectionDAGBuilder will insert appropriate AssertZExt & AssertSExt
3118  // nodes after our lowering.
3119  assert(RegVT == Ins[i].VT && "incorrect register location selected");
3120  break;
3121  }
3122 
3123  InVals.push_back(ArgValue);
3124 
3125  } else { // VA.isRegLoc()
3126  assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem");
3127  unsigned ArgOffset = VA.getLocMemOffset();
3128  unsigned ArgSize = VA.getValVT().getSizeInBits() / 8;
3129 
3130  uint32_t BEAlign = 0;
3131  if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
3132  !Ins[i].Flags.isInConsecutiveRegs())
3133  BEAlign = 8 - ArgSize;
3134 
3135  int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
3136 
3137  // Create load nodes to retrieve arguments from the stack.
3138  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3139  SDValue ArgValue;
3140 
3141  // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
3143  MVT MemVT = VA.getValVT();
3144 
3145  switch (VA.getLocInfo()) {
3146  default:
3147  break;
3148  case CCValAssign::BCvt:
3149  MemVT = VA.getLocVT();
3150  break;
3151  case CCValAssign::SExt:
3152  ExtType = ISD::SEXTLOAD;
3153  break;
3154  case CCValAssign::ZExt:
3155  ExtType = ISD::ZEXTLOAD;
3156  break;
3157  case CCValAssign::AExt:
3158  ExtType = ISD::EXTLOAD;
3159  break;
3160  }
3161 
3162  ArgValue = DAG.getExtLoad(
3163  ExtType, DL, VA.getLocVT(), Chain, FIN,
3165  MemVT);
3166 
3167  InVals.push_back(ArgValue);
3168  }
3169  }
3170 
3171  // varargs
3173  if (isVarArg) {
3174  if (!Subtarget->isTargetDarwin() || IsWin64) {
3175  // The AAPCS variadic function ABI is identical to the non-variadic
3176  // one. As a result there may be more arguments in registers and we should
3177  // save them for future reference.
3178  // Win64 variadic functions also pass arguments in registers, but all float
3179  // arguments are passed in integer registers.
3180  saveVarArgRegisters(CCInfo, DAG, DL, Chain);
3181  }
3182 
3183  // This will point to the next argument passed via stack.
3184  unsigned StackOffset = CCInfo.getNextStackOffset();
3185  // We currently pass all varargs at 8-byte alignment.
3186  StackOffset = ((StackOffset + 7) & ~7);
3187  FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
3188 
3189  if (MFI.hasMustTailInVarArgFunc()) {
3190  SmallVector<MVT, 2> RegParmTypes;
3191  RegParmTypes.push_back(MVT::i64);
3192  RegParmTypes.push_back(MVT::f128);
3193  // Compute the set of forwarded registers. The rest are scratch.
3195  FuncInfo->getForwardedMustTailRegParms();
3196  CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
3197  CC_AArch64_AAPCS);
3198  }
3199  }
3200 
3201  unsigned StackArgSize = CCInfo.getNextStackOffset();
3202  bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
3203  if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
3204  // This is a non-standard ABI so by fiat I say we're allowed to make full
3205  // use of the stack area to be popped, which must be aligned to 16 bytes in
3206  // any case:
3207  StackArgSize = alignTo(StackArgSize, 16);
3208 
3209  // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
3210  // a multiple of 16.
3211  FuncInfo->setArgumentStackToRestore(StackArgSize);
3212 
3213  // This realignment carries over to the available bytes below. Our own
3214  // callers will guarantee the space is free by giving an aligned value to
3215  // CALLSEQ_START.
3216  }
3217  // Even if we're not expected to free up the space, it's useful to know how
3218  // much is there while considering tail calls (because we can reuse it).
3219  FuncInfo->setBytesInStackArgArea(StackArgSize);
3220 
3221  if (Subtarget->hasCustomCallingConv())
3222  Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
3223 
3224  return Chain;
3225 }
3226 
3227 void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
3228  SelectionDAG &DAG,
3229  const SDLoc &DL,
3230  SDValue &Chain) const {
3231  MachineFunction &MF = DAG.getMachineFunction();
3232  MachineFrameInfo &MFI = MF.getFrameInfo();
3234  auto PtrVT = getPointerTy(DAG.getDataLayout());
3235  bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
3236 
3237  SmallVector<SDValue, 8> MemOps;
3238 
3239  static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
3240  AArch64::X3, AArch64::X4, AArch64::X5,
3241  AArch64::X6, AArch64::X7 };
3242  static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
3243  unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
3244 
3245  unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
3246  int GPRIdx = 0;
3247  if (GPRSaveSize != 0) {
3248  if (IsWin64) {
3249  GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
3250  if (GPRSaveSize & 15)
3251  // The extra size here, if triggered, will always be 8.
3252  MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
3253  } else
3254  GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false);
3255 
3256  SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
3257 
3258  for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
3259  unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
3260  SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
3261  SDValue Store = DAG.getStore(
3262  Val.getValue(1), DL, Val, FIN,
3263  IsWin64
3265  GPRIdx,
3266  (i - FirstVariadicGPR) * 8)
3268  MemOps.push_back(Store);
3269  FIN =
3270  DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
3271  }
3272  }
3273  FuncInfo->setVarArgsGPRIndex(GPRIdx);
3274  FuncInfo->setVarArgsGPRSize(GPRSaveSize);
3275 
3276  if (Subtarget->hasFPARMv8() && !IsWin64) {
3277  static const MCPhysReg FPRArgRegs[] = {
3278  AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
3279  AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
3280  static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
3281  unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
3282 
3283  unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
3284  int FPRIdx = 0;
3285  if (FPRSaveSize != 0) {
3286  FPRIdx = MFI.CreateStackObject(FPRSaveSize, 16, false);
3287 
3288  SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
3289 
3290  for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
3291  unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
3292  SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
3293 
3294  SDValue Store = DAG.getStore(
3295  Val.getValue(1), DL, Val, FIN,
3297  MemOps.push_back(Store);
3298  FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
3299  DAG.getConstant(16, DL, PtrVT));
3300  }
3301  }
3302  FuncInfo->setVarArgsFPRIndex(FPRIdx);
3303  FuncInfo->setVarArgsFPRSize(FPRSaveSize);
3304  }
3305 
3306  if (!MemOps.empty()) {
3307  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3308  }
3309 }
3310 
3311 /// LowerCallResult - Lower the result values of a call into the
3312 /// appropriate copies out of appropriate physical registers.
3313 SDValue AArch64TargetLowering::LowerCallResult(
3314  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
3315  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3316  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
3317  SDValue ThisVal) const {
3318  CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3319  ? RetCC_AArch64_WebKit_JS
3320  : RetCC_AArch64_AAPCS;
3321  // Assign locations to each value returned by this call.
3323  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3324  *DAG.getContext());
3325  CCInfo.AnalyzeCallResult(Ins, RetCC);
3326 
3327  // Copy all of the result registers out of their specified physreg.
3328  for (unsigned i = 0; i != RVLocs.size(); ++i) {
3329  CCValAssign VA = RVLocs[i];
3330 
3331  // Pass 'this' value directly from the argument to return value, to avoid
3332  // reg unit interference
3333  if (i == 0 && isThisReturn) {
3334  assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
3335  "unexpected return calling convention register assignment");
3336  InVals.push_back(ThisVal);
3337  continue;
3338  }
3339 
3340  SDValue Val =
3341  DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
3342  Chain = Val.getValue(1);
3343  InFlag = Val.getValue(2);
3344 
3345  switch (VA.getLocInfo()) {
3346  default:
3347  llvm_unreachable("Unknown loc info!");
3348  case CCValAssign::Full:
3349  break;
3350  case CCValAssign::BCvt:
3351  Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
3352  break;
3353  }
3354 
3355  InVals.push_back(Val);
3356  }
3357 
3358  return Chain;
3359 }
3360 
3361 /// Return true if the calling convention is one that we can guarantee TCO for.
3363  return CC == CallingConv::Fast;
3364 }
3365 
3366 /// Return true if we might ever do TCO for calls with this calling convention.
3368  switch (CC) {
3369  case CallingConv::C:
3371  case CallingConv::Swift:
3372  return true;
3373  default:
3374  return canGuaranteeTCO(CC);
3375  }
3376 }
3377 
3378 bool AArch64TargetLowering::isEligibleForTailCallOptimization(
3379  SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
3380  const SmallVectorImpl<ISD::OutputArg> &Outs,
3381  const SmallVectorImpl<SDValue> &OutVals,
3382  const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
3383  if (!mayTailCallThisCC(CalleeCC))
3384  return false;
3385 
3386  MachineFunction &MF = DAG.getMachineFunction();
3387  const Function &CallerF = MF.getFunction();
3388  CallingConv::ID CallerCC = CallerF.getCallingConv();
3389  bool CCMatch = CallerCC == CalleeCC;
3390 
3391  // Byval parameters hand the function a pointer directly into the stack area
3392  // we want to reuse during a tail call. Working around this *is* possible (see
3393  // X86) but less efficient and uglier in LowerCall.
3394  for (Function::const_arg_iterator i = CallerF.arg_begin(),
3395  e = CallerF.arg_end();
3396  i != e; ++i)
3397  if (i->hasByValAttr())
3398  return false;
3399 
3401  return canGuaranteeTCO(CalleeCC) && CCMatch;
3402 
3403  // Externally-defined functions with weak linkage should not be
3404  // tail-called on AArch64 when the OS does not support dynamic
3405  // pre-emption of symbols, as the AAELF spec requires normal calls
3406  // to undefined weak functions to be replaced with a NOP or jump to the
3407  // next instruction. The behaviour of branch instructions in this
3408  // situation (as used for tail calls) is implementation-defined, so we
3409  // cannot rely on the linker replacing the tail call with a return.
3410  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3411  const GlobalValue *GV = G->getGlobal();
3412  const Triple &TT = getTargetMachine().getTargetTriple();
3413  if (GV->hasExternalWeakLinkage() &&
3414  (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
3415  return false;
3416  }
3417 
3418  // Now we search for cases where we can use a tail call without changing the
3419  // ABI. Sibcall is used in some places (particularly gcc) to refer to this
3420  // concept.
3421 
3422  // I want anyone implementing a new calling convention to think long and hard
3423  // about this assert.
3424  assert((!isVarArg || CalleeCC == CallingConv::C) &&
3425  "Unexpected variadic calling convention");
3426 
3427  LLVMContext &C = *DAG.getContext();
3428  if (isVarArg && !Outs.empty()) {
3429  // At least two cases here: if caller is fastcc then we can't have any
3430  // memory arguments (we'd be expected to clean up the stack afterwards). If
3431  // caller is C then we could potentially use its argument area.
3432 
3433  // FIXME: for now we take the most conservative of these in both cases:
3434  // disallow all variadic memory operands.
3436  CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3437 
3438  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
3439  for (const CCValAssign &ArgLoc : ArgLocs)
3440  if (!ArgLoc.isRegLoc())
3441  return false;
3442  }
3443 
3444  // Check that the call results are passed in the same way.
3445  if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
3446  CCAssignFnForCall(CalleeCC, isVarArg),
3447  CCAssignFnForCall(CallerCC, isVarArg)))
3448  return false;
3449  // The callee has to preserve all registers the caller needs to preserve.
3450  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3451  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3452  if (!CCMatch) {
3453  const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3454  if (Subtarget->hasCustomCallingConv()) {
3455  TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
3456  TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
3457  }
3458  if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3459  return false;
3460  }
3461 
3462  // Nothing more to check if the callee is taking no arguments
3463  if (Outs.empty())
3464  return true;
3465 
3467  CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3468 
3469  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
3470 
3471  const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3472 
3473  // If the stack arguments for this call do not fit into our own save area then
3474  // the call cannot be made tail.
3475  if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
3476  return false;
3477 
3478  const MachineRegisterInfo &MRI = MF.getRegInfo();
3479  if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3480  return false;
3481 
3482  return true;
3483 }
3484 
3485 SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
3486  SelectionDAG &DAG,
3487  MachineFrameInfo &MFI,
3488  int ClobberedFI) const {
3489  SmallVector<SDValue, 8> ArgChains;
3490  int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
3491  int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
3492 
3493  // Include the original chain at the beginning of the list. When this is
3494  // used by target LowerCall hooks, this helps legalize find the
3495  // CALLSEQ_BEGIN node.
3496  ArgChains.push_back(Chain);
3497 
3498  // Add a chain value for each stack argument corresponding
3499  for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
3500  UE = DAG.getEntryNode().getNode()->use_end();
3501  U != UE; ++U)
3502  if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
3503  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
3504  if (FI->getIndex() < 0) {
3505  int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
3506  int64_t InLastByte = InFirstByte;
3507  InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
3508 
3509  if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
3510  (FirstByte <= InFirstByte && InFirstByte <= LastByte))
3511  ArgChains.push_back(SDValue(L, 1));
3512  }
3513 
3514  // Build a tokenfactor for all the chains.
3515  return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
3516 }
3517 
3518 bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
3519  bool TailCallOpt) const {
3520  return CallCC == CallingConv::Fast && TailCallOpt;
3521 }
3522 
3523 /// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
3524 /// and add input and output parameter nodes.
3525 SDValue
3526 AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
3527  SmallVectorImpl<SDValue> &InVals) const {
3528  SelectionDAG &DAG = CLI.DAG;
3529  SDLoc &DL = CLI.DL;
3530  SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
3531  SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
3532  SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
3533  SDValue Chain = CLI.Chain;
3534  SDValue Callee = CLI.Callee;
3535  bool &IsTailCall = CLI.IsTailCall;
3536  CallingConv::ID CallConv = CLI.CallConv;
3537  bool IsVarArg = CLI.IsVarArg;
3538 
3539  MachineFunction &MF = DAG.getMachineFunction();
3540  bool IsThisReturn = false;
3541 
3543  bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
3544  bool IsSibCall = false;
3545 
3546  if (IsTailCall) {
3547  // Check if it's really possible to do a tail call.
3548  IsTailCall = isEligibleForTailCallOptimization(
3549  Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
3550  if (!IsTailCall && CLI.CS && CLI.CS.isMustTailCall())
3551  report_fatal_error("failed to perform tail call elimination on a call "
3552  "site marked musttail");
3553 
3554  // A sibling call is one where we're under the usual C ABI and not planning
3555  // to change that but can still do a tail call:
3556  if (!TailCallOpt && IsTailCall)
3557  IsSibCall = true;
3558 
3559  if (IsTailCall)
3560  ++NumTailCalls;
3561  }
3562 
3563  // Analyze operands of the call, assigning locations to each operand.
3565  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
3566  *DAG.getContext());
3567 
3568  if (IsVarArg) {
3569  // Handle fixed and variable vector arguments differently.
3570  // Variable vector arguments always go into memory.
3571  unsigned NumArgs = Outs.size();
3572 
3573  for (unsigned i = 0; i != NumArgs; ++i) {
3574  MVT ArgVT = Outs[i].VT;
3575  ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3576  CCAssignFn *AssignFn = CCAssignFnForCall(CallConv,
3577  /*IsVarArg=*/ !Outs[i].IsFixed);
3578  bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
3579  assert(!Res && "Call operand has unhandled type");
3580  (void)Res;
3581  }
3582  } else {
3583  // At this point, Outs[].VT may already be promoted to i32. To correctly
3584  // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
3585  // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
3586  // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
3587  // we use a special version of AnalyzeCallOperands to pass in ValVT and
3588  // LocVT.
3589  unsigned NumArgs = Outs.size();
3590  for (unsigned i = 0; i != NumArgs; ++i) {
3591  MVT ValVT = Outs[i].VT;
3592  // Get type of the original argument.
3593  EVT ActualVT = getValueType(DAG.getDataLayout(),
3594  CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
3595  /*AllowUnknown*/ true);
3596  MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
3597  ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3598  // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
3599  if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
3600  ValVT = MVT::i8;
3601  else if (ActualMVT == MVT::i16)
3602  ValVT = MVT::i16;
3603 
3604  CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
3605  bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
3606  assert(!Res && "Call operand has unhandled type");
3607  (void)Res;
3608  }
3609  }
3610 
3611  // Get a count of how many bytes are to be pushed on the stack.
3612  unsigned NumBytes = CCInfo.getNextStackOffset();
3613 
3614  if (IsSibCall) {
3615  // Since we're not changing the ABI to make this a tail call, the memory
3616  // operands are already available in the caller's incoming argument space.
3617  NumBytes = 0;
3618  }
3619 
3620  // FPDiff is the byte offset of the call's argument area from the callee's.
3621  // Stores to callee stack arguments will be placed in FixedStackSlots offset
3622  // by this amount for a tail call. In a sibling call it must be 0 because the
3623  // caller will deallocate the entire stack and the callee still expects its
3624  // arguments to begin at SP+0. Completely unused for non-tail calls.
3625  int FPDiff = 0;
3626 
3627  if (IsTailCall && !IsSibCall) {
3628  unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
3629 
3630  // Since callee will pop argument stack as a tail call, we must keep the
3631  // popped size 16-byte aligned.
3632  NumBytes = alignTo(NumBytes, 16);
3633 
3634  // FPDiff will be negative if this tail call requires more space than we
3635  // would automatically have in our incoming argument space. Positive if we
3636  // can actually shrink the stack.
3637  FPDiff = NumReusableBytes - NumBytes;
3638 
3639  // The stack pointer must be 16-byte aligned at all times it's used for a
3640  // memory operation, which in practice means at *all* times and in
3641  // particular across call boundaries. Therefore our own arguments started at
3642  // a 16-byte aligned SP and the delta applied for the tail call should
3643  // satisfy the same constraint.
3644  assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
3645  }
3646 
3647  // Adjust the stack pointer for the new arguments...
3648  // These operations are automatically eliminated by the prolog/epilog pass
3649  if (!IsSibCall)
3650  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
3651 
3652  SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
3653  getPointerTy(DAG.getDataLayout()));
3654 
3656  SmallVector<SDValue, 8> MemOpChains;
3657  auto PtrVT = getPointerTy(DAG.getDataLayout());
3658 
3659  if (IsVarArg && CLI.CS && CLI.CS.isMustTailCall()) {
3660  const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
3661  for (const auto &F : Forwards) {
3662  SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
3663  RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
3664  }
3665  }
3666 
3667  // Walk the register/memloc assignments, inserting copies/loads.
3668  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e;
3669  ++i, ++realArgIdx) {
3670  CCValAssign &VA = ArgLocs[i];
3671  SDValue Arg = OutVals[realArgIdx];
3672  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
3673 
3674  // Promote the value if needed.
3675  switch (VA.getLocInfo()) {
3676  default:
3677  llvm_unreachable("Unknown loc info!");
3678  case CCValAssign::Full:
3679  break;
3680  case CCValAssign::SExt:
3681  Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
3682  break;
3683  case CCValAssign::ZExt:
3684  Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
3685  break;
3686  case CCValAssign::AExt:
3687  if (Outs[realArgIdx].ArgVT == MVT::i1) {
3688  // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
3689  Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
3690  Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
3691  }
3692  Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
3693  break;
3694  case CCValAssign::BCvt:
3695  Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
3696  break;
3697  case CCValAssign::FPExt:
3698  Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
3699  break;
3700  }
3701 
3702  if (VA.isRegLoc()) {
3703  if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
3704  Outs[0].VT == MVT::i64) {
3705  assert(VA.getLocVT() == MVT::i64 &&
3706  "unexpected calling convention register assignment");
3707  assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&
3708  "unexpected use of 'returned'");
3709  IsThisReturn = true;
3710  }
3711  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3712  } else {
3713  assert(VA.isMemLoc());
3714 
3715  SDValue DstAddr;
3716  MachinePointerInfo DstInfo;
3717 
3718  // FIXME: This works on big-endian for composite byvals, which are the
3719  // common case. It should also work for fundamental types too.
3720  uint32_t BEAlign = 0;
3721  unsigned OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
3722  : VA.getValVT().getSizeInBits();
3723  OpSize = (OpSize + 7) / 8;
3724  if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
3725  !Flags.isInConsecutiveRegs()) {
3726  if (OpSize < 8)
3727  BEAlign = 8 - OpSize;
3728  }
3729  unsigned LocMemOffset = VA.getLocMemOffset();
3730  int32_t Offset = LocMemOffset + BEAlign;
3731  SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
3732  PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
3733 
3734  if (IsTailCall) {
3735  Offset = Offset + FPDiff;
3736  int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
3737 
3738  DstAddr = DAG.getFrameIndex(FI, PtrVT);
3739  DstInfo =
3741 
3742  // Make sure any stack arguments overlapping with where we're storing
3743  // are loaded before this eventual operation. Otherwise they'll be
3744  // clobbered.
3745  Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
3746  } else {
3747  SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
3748 
3749  DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
3751  LocMemOffset);
3752  }
3753 
3754  if (Outs[i].Flags.isByVal()) {
3755  SDValue SizeNode =
3756  DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
3757  SDValue Cpy = DAG.getMemcpy(
3758  Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(),
3759  /*isVol = */ false, /*AlwaysInline = */ false,
3760  /*isTailCall = */ false,
3761  DstInfo, MachinePointerInfo());
3762 
3763  MemOpChains.push_back(Cpy);
3764  } else {
3765  // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
3766  // promoted to a legal register type i32, we should truncate Arg back to
3767  // i1/i8/i16.
3768  if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
3769  VA.getValVT() == MVT::i16)
3770  Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
3771 
3772  SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
3773  MemOpChains.push_back(Store);
3774  }
3775  }
3776  }
3777 
3778  if (!MemOpChains.empty())
3779  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
3780 
3781  // Build a sequence of copy-to-reg nodes chained together with token chain
3782  // and flag operands which copy the outgoing args into the appropriate regs.
3783  SDValue InFlag;
3784  for (auto &RegToPass : RegsToPass) {
3785  Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
3786  RegToPass.second, InFlag);
3787  InFlag = Chain.getValue(1);
3788  }
3789 
3790  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
3791  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
3792  // node so that legalize doesn't hack it.
3793  if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3794  auto GV = G->getGlobal();
3795  if (Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine()) ==
3797  Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT);
3798  Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
3799  } else if (Subtarget->isTargetCOFF() && GV->hasDLLImportStorageClass()) {
3800  assert(Subtarget->isTargetWindows() &&
3801  "Windows is the only supported COFF target");
3802  Callee = getGOT(G, DAG, AArch64II::MO_DLLIMPORT);
3803  } else {
3804  const GlobalValue *GV = G->getGlobal();
3805  Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
3806  }
3807  } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3808  if (getTargetMachine().getCodeModel() == CodeModel::Large &&
3809  Subtarget->isTargetMachO()) {
3810  const char *Sym = S->getSymbol();
3811  Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
3812  Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
3813  } else {
3814  const char *Sym = S->getSymbol();
3815  Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
3816  }
3817  }
3818 
3819  // We don't usually want to end the call-sequence here because we would tidy
3820  // the frame up *after* the call, however in the ABI-changing tail-call case
3821  // we've carefully laid out the parameters so that when sp is reset they'll be
3822  // in the correct location.
3823  if (IsTailCall && !IsSibCall) {
3824  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
3825  DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
3826  InFlag = Chain.getValue(1);
3827  }
3828 
3829  std::vector<SDValue> Ops;
3830  Ops.push_back(Chain);
3831  Ops.push_back(Callee);
3832 
3833  if (IsTailCall) {
3834  // Each tail call may have to adjust the stack by a different amount, so
3835  // this information must travel along with the operation for eventual
3836  // consumption by emitEpilogue.
3837  Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
3838  }
3839 
3840  // Add argument registers to the end of the list so that they are known live
3841  // into the call.
3842  for (auto &RegToPass : RegsToPass)
3843  Ops.push_back(DAG.getRegister(RegToPass.first,
3844  RegToPass.second.getValueType()));
3845 
3846  // Add a register mask operand representing the call-preserved registers.
3847  const uint32_t *Mask;
3848  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3849  if (IsThisReturn) {
3850  // For 'this' returns, use the X0-preserving mask if applicable
3851  Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
3852  if (!Mask) {
3853  IsThisReturn = false;
3854  Mask = TRI->getCallPreservedMask(MF, CallConv);
3855  }
3856  } else
3857  Mask = TRI->getCallPreservedMask(MF, CallConv);
3858 
3859  if (Subtarget->hasCustomCallingConv())
3860  TRI->UpdateCustomCallPreservedMask(MF, &Mask);
3861 
3862  if (TRI->isAnyArgRegReserved(MF))
3863  TRI->emitReservedArgRegCallError(MF);
3864 
3865  assert(Mask && "Missing call preserved mask for calling convention");
3866  Ops.push_back(DAG.getRegisterMask(Mask));
3867 
3868  if (InFlag.getNode())
3869  Ops.push_back(InFlag);
3870 
3871  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3872 
3873  // If we're doing a tall call, use a TC_RETURN here rather than an
3874  // actual call instruction.
3875  if (IsTailCall) {
3877  return DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
3878  }
3879 
3880  // Returns a chain and a flag for retval copy to use.
3881  Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops);
3882  InFlag = Chain.getValue(1);
3883 
3884  uint64_t CalleePopBytes =
3885  DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0;
3886 
3887  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
3888  DAG.getIntPtrConstant(CalleePopBytes, DL, true),
3889  InFlag, DL);
3890  if (!Ins.empty())
3891  InFlag = Chain.getValue(1);
3892 
3893  // Handle result values, copying them out of physregs into vregs that we
3894  // return.
3895  return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
3896  InVals, IsThisReturn,
3897  IsThisReturn ? OutVals[0] : SDValue());
3898 }
3899 
3900 bool AArch64TargetLowering::CanLowerReturn(
3901  CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
3902  const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
3903  CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3904  ? RetCC_AArch64_WebKit_JS
3905  : RetCC_AArch64_AAPCS;
3907  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3908  return CCInfo.CheckReturn(Outs, RetCC);
3909 }
3910 
3911 SDValue
3912 AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3913  bool isVarArg,
3914  const SmallVectorImpl<ISD::OutputArg> &Outs,
3915  const SmallVectorImpl<SDValue> &OutVals,
3916  const SDLoc &DL, SelectionDAG &DAG) const {
3917  CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3918  ? RetCC_AArch64_WebKit_JS
3919  : RetCC_AArch64_AAPCS;
3921  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3922  *DAG.getContext());
3923  CCInfo.AnalyzeReturn(Outs, RetCC);
3924 
3925  // Copy the result values into the output registers.
3926  SDValue Flag;
3927  SmallVector<SDValue, 4> RetOps(1, Chain);
3928  for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
3929  ++i, ++realRVLocIdx) {
3930  CCValAssign &VA = RVLocs[i];
3931  assert(VA.isRegLoc() && "Can only return in registers!");
3932  SDValue Arg = OutVals[realRVLocIdx];
3933 
3934  switch (VA.getLocInfo()) {
3935  default:
3936  llvm_unreachable("Unknown loc info!");
3937  case CCValAssign::Full:
3938  if (Outs[i].ArgVT == MVT::i1) {
3939  // AAPCS requires i1 to be zero-extended to i8 by the producer of the
3940  // value. This is strictly redundant on Darwin (which uses "zeroext
3941  // i1"), but will be optimised out before ISel.
3942  Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
3943  Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
3944  }
3945  break;
3946  case CCValAssign::BCvt:
3947  Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
3948  break;
3949  }
3950 
3951  Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
3952  Flag = Chain.getValue(1);
3953  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3954  }
3955  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3956  const MCPhysReg *I =
3958  if (I) {
3959  for (; *I; ++I) {
3960  if (AArch64::GPR64RegClass.contains(*I))
3961  RetOps.push_back(DAG.getRegister(*I, MVT::i64));
3962  else if (AArch64::FPR64RegClass.contains(*I))
3963  RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
3964  else
3965  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
3966  }
3967  }
3968 
3969  RetOps[0] = Chain; // Update chain.
3970 
3971  // Add the flag if we have it.
3972  if (Flag.getNode())
3973  RetOps.push_back(Flag);
3974 
3975  return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
3976 }
3977 
3978 //===----------------------------------------------------------------------===//
3979 // Other Lowering Code
3980 //===----------------------------------------------------------------------===//
3981 
3982 SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
3983  SelectionDAG &DAG,
3984  unsigned Flag) const {
3985  return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
3986  N->getOffset(), Flag);
3987 }
3988 
3989 SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
3990  SelectionDAG &DAG,
3991  unsigned Flag) const {
3992  return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
3993 }
3994 
3995 SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
3996  SelectionDAG &DAG,
3997  unsigned Flag) const {
3998  return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),
3999  N->getOffset(), Flag);
4000 }
4001 
4002 SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
4003  SelectionDAG &DAG,
4004  unsigned Flag) const {
4005  return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
4006 }
4007 
4008 // (loadGOT sym)
4009 template <class NodeTy>
4010 SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG,
4011  unsigned Flags) const {
4012  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n");
4013  SDLoc DL(N);
4014  EVT Ty = getPointerTy(DAG.getDataLayout());
4015  SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT | Flags);
4016  // FIXME: Once remat is capable of dealing with instructions with register
4017  // operands, expand this into two nodes instead of using a wrapper node.
4018  return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
4019 }
4020 
4021 // (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym))
4022 template <class NodeTy>
4023 SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG,
4024  unsigned Flags) const {
4025  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n");
4026  SDLoc DL(N);
4027  EVT Ty = getPointerTy(DAG.getDataLayout());
4028  const unsigned char MO_NC = AArch64II::MO_NC;
4029  return DAG.getNode(
4030  AArch64ISD::WrapperLarge, DL, Ty,
4031  getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags),
4032  getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags),
4033  getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags),
4034  getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags));
4035 }
4036 
4037 // (addlow (adrp %hi(sym)) %lo(sym))
4038 template <class NodeTy>
4039 SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
4040  unsigned Flags) const {
4041  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n");
4042  SDLoc DL(N);
4043  EVT Ty = getPointerTy(DAG.getDataLayout());
4044  SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags);
4045  SDValue Lo = getTargetNode(N, Ty, DAG,
4047  SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
4048  return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
4049 }
4050 
4051 // (adr sym)
4052 template <class NodeTy>
4053 SDValue AArch64TargetLowering::getAddrTiny(NodeTy *N, SelectionDAG &DAG,
4054  unsigned Flags) const {
4055  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrTiny\n");
4056  SDLoc DL(N);
4057  EVT Ty = getPointerTy(DAG.getDataLayout());
4058  SDValue Sym = getTargetNode(N, Ty, DAG, Flags);
4059  return DAG.getNode(AArch64ISD::ADR, DL, Ty, Sym);
4060 }
4061 
4062 SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
4063  SelectionDAG &DAG) const {
4064  GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
4065  const GlobalValue *GV = GN->getGlobal();
4066  unsigned char OpFlags =
4067  Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
4068 
4069  if (OpFlags != AArch64II::MO_NO_FLAG)
4070  assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
4071  "unexpected offset in global node");
4072 
4073  // This also catches the large code model case for Darwin, and tiny code
4074  // model with got relocations.
4075  if ((OpFlags & AArch64II::MO_GOT) != 0) {
4076  return getGOT(GN, DAG, OpFlags);
4077  }
4078 
4079  SDValue Result;
4080  if (getTargetMachine().getCodeModel() == CodeModel::Large) {
4081  Result = getAddrLarge(GN, DAG, OpFlags);
4082  } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
4083  Result = getAddrTiny(GN, DAG, OpFlags);
4084  } else {
4085  Result = getAddr(GN, DAG, OpFlags);
4086  }
4087  EVT PtrVT = getPointerTy(DAG.getDataLayout());
4088  SDLoc DL(GN);
4090  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
4092  return Result;
4093 }
4094 
4095 /// Convert a TLS address reference into the correct sequence of loads
4096 /// and calls to compute the variable's address (for Darwin, currently) and
4097 /// return an SDValue containing the final node.
4098 
4099 /// Darwin only has one TLS scheme which must be capable of dealing with the
4100 /// fully general situation, in the worst case. This means:
4101 /// + "extern __thread" declaration.
4102 /// + Defined in a possibly unknown dynamic library.
4103 ///
4104 /// The general system is that each __thread variable has a [3 x i64] descriptor
4105 /// which contains information used by the runtime to calculate the address. The
4106 /// only part of this the compiler needs to know about is the first xword, which
4107 /// contains a function pointer that must be called with the address of the
4108 /// entire descriptor in "x0".
4109 ///
4110 /// Since this descriptor may be in a different unit, in general even the
4111 /// descriptor must be accessed via an indirect load. The "ideal" code sequence
4112 /// is:
4113 /// adrp x0, _var@TLVPPAGE
4114 /// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor
4115 /// ldr x1, [x0] ; x1 contains 1st entry of descriptor,
4116 /// ; the function pointer
4117 /// blr x1 ; Uses descriptor address in x0
4118 /// ; Address of _var is now in x0.
4119 ///
4120 /// If the address of _var's descriptor *is* known to the linker, then it can
4121 /// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
4122 /// a slight efficiency gain.
4123 SDValue
4124 AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
4125  SelectionDAG &DAG) const {
4126  assert(Subtarget->isTargetDarwin() &&
4127  "This function expects a Darwin target");
4128 
4129  SDLoc DL(Op);
4130  MVT PtrVT = getPointerTy(DAG.getDataLayout());
4131  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4132 
4133  SDValue TLVPAddr =
4134  DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
4135  SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr);
4136 
4137  // The first entry in the descriptor is a function pointer that we must call
4138  // to obtain the address of the variable.
4139  SDValue Chain = DAG.getEntryNode();
4140  SDValue FuncTLVGet = DAG.getLoad(
4141  MVT::i64, DL, Chain, DescAddr,
4143  /* Alignment = */ 8,
4146  Chain = FuncTLVGet.getValue(1);
4147 
4149  MFI.setAdjustsStack(true);
4150 
4151  // TLS calls preserve all registers except those that absolutely must be
4152  // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
4153  // silly).
4154  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
4155  const uint32_t *Mask = TRI->getTLSCallPreservedMask();
4156  if (Subtarget->hasCustomCallingConv())
4158 
4159  // Finally, we can make the call. This is just a degenerate version of a
4160  // normal AArch64 call node: x0 takes the address of the descriptor, and
4161  // returns the address of the variable in this thread.
4162  Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue());
4163  Chain =
4165  Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64),
4166  DAG.getRegisterMask(Mask), Chain.getValue(1));
4167  return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
4168 }
4169 
4170 /// When accessing thread-local variables under either the general-dynamic or
4171 /// local-dynamic system, we make a "TLS-descriptor" call. The variable will
4172 /// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
4173 /// is a function pointer to carry out the resolution.
4174 ///
4175 /// The sequence is:
4176 /// adrp x0, :tlsdesc:var
4177 /// ldr x1, [x0, #:tlsdesc_lo12:var]
4178 /// add x0, x0, #:tlsdesc_lo12:var
4179 /// .tlsdesccall var
4180 /// blr x1
4181 /// (TPIDR_EL0 offset now in x0)
4182 ///
4183 /// The above sequence must be produced unscheduled, to enable the linker to
4184 /// optimize/relax this sequence.
4185 /// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
4186 /// above sequence, and expanded really late in the compilation flow, to ensure
4187 /// the sequence is produced as per above.
4188 SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr,
4189  const SDLoc &DL,
4190  SelectionDAG &DAG) const {
4191  EVT PtrVT = getPointerTy(DAG.getDataLayout());
4192 
4193  SDValue Chain = DAG.getEntryNode();
4194  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4195 
4196  Chain =
4197  DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, {Chain, SymAddr});
4198  SDValue Glue = Chain.getValue(1);
4199 
4200  return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
4201 }
4202 
4203 SDValue
4204 AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
4205  SelectionDAG &DAG) const {
4206  assert(Subtarget->isTargetELF() && "This function expects an ELF target");
4208  report_fatal_error("ELF TLS only supported in small memory model");
4209  // Different choices can be made for the maximum size of the TLS area for a
4210  // module. For the small address model, the default TLS size is 16MiB and the
4211  // maximum TLS size is 4GiB.
4212  // FIXME: add -mtls-size command line option and make it control the 16MiB
4213  // vs. 4GiB code sequence generation.
4214  // FIXME: add tiny codemodel support. We currently generate the same code as
4215  // small, which may be larger than needed.
4216  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
4217 
4219 
4221  if (Model == TLSModel::LocalDynamic)
4222  Model = TLSModel::GeneralDynamic;
4223  }
4224 
4225  SDValue TPOff;
4226  EVT PtrVT = getPointerTy(DAG.getDataLayout());
4227  SDLoc DL(Op);
4228  const GlobalValue *GV = GA->getGlobal();
4229 
4230  SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
4231 
4232  if (Model == TLSModel::LocalExec) {
4233  SDValue HiVar = DAG.getTargetGlobalAddress(
4234  GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
4235  SDValue LoVar = DAG.getTargetGlobalAddress(
4236  GV, DL, PtrVT, 0,
4238 
4239  SDValue TPWithOff_lo =
4240  SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
4241  HiVar,
4242  DAG.getTargetConstant(0, DL, MVT::i32)),
4243  0);
4244  SDValue TPWithOff =
4245  SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo,
4246  LoVar,
4247  DAG.getTargetConstant(0, DL, MVT::i32)),
4248  0);
4249  return TPWithOff;
4250  } else if (Model == TLSModel::InitialExec) {
4251  TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
4252  TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
4253  } else if (Model == TLSModel::LocalDynamic) {
4254  // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
4255  // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
4256  // the beginning of the module's TLS region, followed by a DTPREL offset
4257  // calculation.
4258 
4259  // These accesses will need deduplicating if there's more than one.
4260  AArch64FunctionInfo *MFI =
4263 
4264  // The call needs a relocation too for linker relaxation. It doesn't make
4265  // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
4266  // the address.
4267  SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
4269 
4270  // Now we can calculate the offset from TPIDR_EL0 to this module's
4271  // thread-local area.
4272  TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
4273 
4274  // Now use :dtprel_whatever: operations to calculate this variable's offset
4275  // in its thread-storage area.
4276  SDValue HiVar = DAG.getTargetGlobalAddress(
4278  SDValue LoVar = DAG.getTargetGlobalAddress(
4279  GV, DL, MVT::i64, 0,
4281 
4282  TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
4283  DAG.getTargetConstant(0, DL, MVT::i32)),
4284  0);
4285  TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
4286  DAG.getTargetConstant(0, DL, MVT::i32)),
4287  0);
4288  } else if (Model == TLSModel::GeneralDynamic) {
4289  // The call needs a relocation too for linker relaxation. It doesn't make
4290  // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
4291  // the address.
4292  SDValue SymAddr =
4293  DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
4294 
4295  // Finally we can make a call to calculate the offset from tpidr_el0.
4296  TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
4297  } else
4298  llvm_unreachable("Unsupported ELF TLS access model");
4299 
4300  return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
4301 }
4302 
4303 SDValue
4304 AArch64TargetLowering::LowerWindowsGlobalTLSAddress(SDValue Op,
4305  SelectionDAG &DAG) const {
4306  assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
4307 
4308  SDValue Chain = DAG.getEntryNode();
4309  EVT PtrVT = getPointerTy(DAG.getDataLayout());
4310  SDLoc DL(Op);
4311 
4312  SDValue TEB = DAG.getRegister(AArch64::X18, MVT::i64);
4313 
4314  // Load the ThreadLocalStoragePointer from the TEB
4315  // A pointer to the TLS array is located at offset 0x58 from the TEB.
4316  SDValue TLSArray =
4317  DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x58, DL));
4318  TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
4319  Chain = TLSArray.getValue(1);
4320 
4321  // Load the TLS index from the C runtime;
4322  // This does the same as getAddr(), but without having a GlobalAddressSDNode.
4323  // This also does the same as LOADgot, but using a generic i32 load,
4324  // while LOADgot only loads i64.
4325  SDValue TLSIndexHi =
4326  DAG.getTargetExternalSymbol("_tls_index", PtrVT, AArch64II::MO_PAGE);
4327  SDValue TLSIndexLo = DAG.getTargetExternalSymbol(
4328  "_tls_index", PtrVT, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4329  SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, TLSIndexHi);
4330  SDValue TLSIndex =
4331  DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, TLSIndexLo);
4332  TLSIndex = DAG.getLoad(MVT::i32, DL, Chain, TLSIndex, MachinePointerInfo());
4333  Chain = TLSIndex.getValue(1);
4334 
4335  // The pointer to the thread's TLS data area is at the TLS Index scaled by 8
4336  // offset into the TLSArray.
4337  TLSIndex = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TLSIndex);
4338  SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
4339  DAG.getConstant(3, DL, PtrVT));
4340  SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
4341  DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
4342  MachinePointerInfo());
4343  Chain = TLS.getValue(1);
4344 
4345  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
4346  const GlobalValue *GV = GA->getGlobal();
4347  SDValue TGAHi = DAG.getTargetGlobalAddress(
4348  GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
4349  SDValue TGALo = DAG.getTargetGlobalAddress(
4350  GV, DL, PtrVT, 0,
4352 
4353  // Add the offset from the start of the .tls section (section base).
4354  SDValue Addr =
4355  SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TLS, TGAHi,
4356  DAG.getTargetConstant(0, DL, MVT::i32)),
4357  0);
4358  Addr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, Addr, TGALo);
4359  return Addr;
4360 }
4361 
4362 SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
4363  SelectionDAG &DAG) const {
4364  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
4365  if (DAG.getTarget().useEmulatedTLS())
4366  return LowerToTLSEmulatedModel(GA, DAG);
4367 
4368  if (Subtarget->isTargetDarwin())
4369  return LowerDarwinGlobalTLSAddress(Op, DAG);
4370  if (Subtarget->isTargetELF())
4371  return LowerELFGlobalTLSAddress(Op, DAG);
4372  if (Subtarget->isTargetWindows())
4373  return LowerWindowsGlobalTLSAddress(Op, DAG);
4374 
4375  llvm_unreachable("Unexpected platform trying to use TLS");
4376 }
4377 
4378 SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
4379  SDValue Chain = Op.getOperand(0);
4380  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
4381  SDValue LHS = Op.getOperand(2);
4382  SDValue RHS = Op.getOperand(3);
4383  SDValue Dest = Op.getOperand(4);
4384  SDLoc dl(Op);
4385 
4386  MachineFunction &MF = DAG.getMachineFunction();
4387  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
4388  // will not be produced, as they are conditional branch instructions that do
4389  // not set flags.
4390  bool ProduceNonFlagSettingCondBr =
4392 
4393  // Handle f128 first, since lowering it will result in comparing the return
4394  // value of a libcall against zero, which is just what the rest of LowerBR_CC
4395  // is expecting to deal with.
4396  if (LHS.getValueType() == MVT::f128) {
4397  softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
4398 
4399  // If softenSetCCOperands returned a scalar, we need to compare the result
4400  // against zero to select between true and false values.
4401  if (!RHS.getNode()) {
4402  RHS = DAG.getConstant(0, dl, LHS.getValueType());
4403  CC = ISD::SETNE;
4404  }
4405  }
4406 
4407  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
4408  // instruction.
4409  if (isOverflowIntrOpRes(LHS) && isOneConstant(RHS) &&
4410  (CC == ISD::SETEQ || CC == ISD::SETNE)) {
4411  // Only lower legal XALUO ops.
4412  if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
4413  return SDValue();
4414 
4415  // The actual operation with overflow check.
4416  AArch64CC::CondCode OFCC;
4417  SDValue Value, Overflow;
4418  std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG);
4419 
4420  if (CC == ISD::SETNE)
4421  OFCC = getInvertedCondCode(OFCC);
4422  SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
4423 
4424  return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
4425  Overflow);
4426  }
4427 
4428  if (LHS.getValueType().isInteger()) {
4429  assert((LHS.getValueType() == RHS.getValueType()) &&
4430  (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64));
4431 
4432  // If the RHS of the comparison is zero, we can potentially fold this
4433  // to a specialized branch.
4434  const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
4435  if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
4436  if (CC == ISD::SETEQ) {
4437  // See if we can use a TBZ to fold in an AND as well.
4438  // TBZ has a smaller branch displacement than CBZ. If the offset is
4439  // out of bounds, a late MI-layer pass rewrites branches.
4440  // 403.gcc is an example that hits this case.
4441  if (LHS.getOpcode() == ISD::AND &&
4442  isa<ConstantSDNode>(LHS.getOperand(1)) &&
4444  SDValue Test = LHS.getOperand(0);
4445  uint64_t Mask = LHS.getConstantOperandVal(1);
4446  return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
4447  DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
4448  Dest);
4449  }
4450 
4451  return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
4452  } else if (CC == ISD::SETNE) {
4453  // See if we can use a TBZ to fold in an AND as well.
4454  // TBZ has a smaller branch displacement than CBZ. If the offset is
4455  // out of bounds, a late MI-layer pass rewrites branches.
4456  // 403.gcc is an example that hits this case.
4457  if (LHS.getOpcode() == ISD::AND &&
4458  isa<ConstantSDNode>(LHS.getOperand(1)) &&
4460  SDValue Test = LHS.getOperand(0);
4461  uint64_t Mask = LHS.getConstantOperandVal(1);
4462  return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
4463  DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
4464  Dest);
4465  }
4466 
4467  return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
4468  } else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) {
4469  // Don't combine AND since emitComparison converts the AND to an ANDS
4470  // (a.k.a. TST) and the test in the test bit and branch instruction
4471  // becomes redundant. This would also increase register pressure.
4472  uint64_t Mask = LHS.getValueSizeInBits() - 1;
4473  return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
4474  DAG.getConstant(Mask, dl, MVT::i64), Dest);
4475  }
4476  }
4477  if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
4478  LHS.getOpcode() != ISD::AND && ProduceNonFlagSettingCondBr) {
4479  // Don't combine AND since emitComparison converts the AND to an ANDS
4480  // (a.k.a. TST) and the test in the test bit and branch instruction
4481  // becomes redundant. This would also increase register pressure.
4482  uint64_t Mask = LHS.getValueSizeInBits() - 1;
4483  return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
4484  DAG.getConstant(Mask, dl, MVT::i64), Dest);
4485  }
4486 
4487  SDValue CCVal;
4488  SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
4489  return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
4490  Cmp);
4491  }
4492 
4493  assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||
4494  LHS.getValueType() == MVT::f64);
4495 
4496  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
4497  // clean. Some of them require two branches to implement.
4498  SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
4499  AArch64CC::CondCode CC1, CC2;
4500  changeFPCCToAArch64CC(CC, CC1, CC2);
4501  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4502  SDValue BR1 =
4503  DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
4504  if (CC2 != AArch64CC::AL) {
4505  SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
4506  return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
4507  Cmp);
4508  }
4509 
4510  return BR1;
4511 }
4512 
4513 SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
4514  SelectionDAG &DAG) const {
4515  EVT VT = Op.getValueType();
4516  SDLoc DL(Op);
4517 
4518  SDValue In1 = Op.getOperand(0);
4519  SDValue In2 = Op.getOperand(1);
4520  EVT SrcVT = In2.getValueType();
4521 
4522  if (SrcVT.bitsLT(VT))
4523  In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
4524  else if (SrcVT.bitsGT(VT))
4525  In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));
4526 
4527  EVT VecVT;
4528  uint64_t EltMask;
4529  SDValue VecVal1, VecVal2;
4530 
4531  auto setVecVal = [&] (int Idx) {
4532  if (!VT.isVector()) {
4533  VecVal1 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
4534  DAG.getUNDEF(VecVT), In1);
4535  VecVal2 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
4536  DAG.getUNDEF(VecVT), In2);
4537  } else {
4538  VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
4539  VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
4540  }
4541  };
4542 
4543  if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
4544  VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
4545  EltMask = 0x80000000ULL;
4546  setVecVal(AArch64::ssub);
4547  } else if (VT == MVT::f64 || VT == MVT::v2f64) {
4548  VecVT = MVT::v2i64;
4549 
4550  // We want to materialize a mask with the high bit set, but the AdvSIMD
4551  // immediate moves cannot materialize that in a single instruction for
4552  // 64-bit elements. Instead, materialize zero and then negate it.
4553  EltMask = 0;
4554 
4555  setVecVal(AArch64::dsub);
4556  } else if (VT == MVT::f16 || VT == MVT::v4f16 || VT == MVT::v8f16) {
4557  VecVT = (VT == MVT::v4f16 ? MVT::v4i16 : MVT::v8i16);
4558  EltMask = 0x8000ULL;
4559  setVecVal(AArch64::hsub);
4560  } else {
4561  llvm_unreachable("Invalid type for copysign!");
4562  }
4563 
4564  SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT);
4565 
4566  // If we couldn't materialize the mask above, then the mask vector will be
4567  // the zero vector, and we need to negate it here.
4568  if (VT == MVT::f64 || VT == MVT::v2f64) {
4569  BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec);
4570  BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec);
4571  BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec);
4572  }
4573 
4574  SDValue Sel =
4575  DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
4576 
4577  if (VT == MVT::f16)
4578  return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, Sel);
4579  if (VT == MVT::f32)
4580  return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel);
4581  else if (VT == MVT::f64)
4582  return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel);
4583  else
4584  return DAG.getNode(ISD::BITCAST, DL, VT, Sel);
4585 }
4586 
4587 SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
4590  return SDValue();
4591 
4592  if (!Subtarget->hasNEON())
4593  return SDValue();
4594 
4595  // While there is no integer popcount instruction, it can
4596  // be more efficiently lowered to the following sequence that uses
4597  // AdvSIMD registers/instructions as long as the copies to/from
4598  // the AdvSIMD registers are cheap.
4599  // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
4600  // CNT V0.8B, V0.8B // 8xbyte pop-counts
4601  // ADDV B0, V0.8B // sum 8xbyte pop-counts
4602  // UMOV X0, V0.B[0] // copy byte result back to integer reg
4603  SDValue Val = Op.getOperand(0);
4604  SDLoc DL(Op);
4605  EVT VT = Op.getValueType();
4606 
4607  if (VT == MVT::i32 || VT == MVT::i64) {
4608  if (VT == MVT::i32)
4609  Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
4610  Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
4611 
4612  SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
4613  SDValue UaddLV = DAG.getNode(
4616 
4617  if (VT == MVT::i64)
4618  UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
4619  return UaddLV;
4620  }
4621 
4622  assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
4623  VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
4624  "Unexpected type for custom ctpop lowering");
4625 
4626  EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
4627  Val = DAG.getBitcast(VT8Bit, Val);
4628  Val = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Val);
4629 
4630  // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
4631  unsigned EltSize = 8;
4632  unsigned NumElts = VT.is64BitVector() ? 8 : 16;
4633  while (EltSize != VT.getScalarSizeInBits()) {
4634  EltSize *= 2;
4635  NumElts /= 2;
4636  MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
4637  Val = DAG.getNode(
4638  ISD::INTRINSIC_WO_CHAIN, DL, WidenVT,
4640  }
4641 
4642  return Val;
4643 }
4644 
4645 SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
4646 
4647  if (Op.getValueType().isVector())
4648  return LowerVSETCC(Op, DAG);
4649 
4650  SDValue LHS = Op.getOperand(0);
4651  SDValue RHS = Op.getOperand(1);
4652  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
4653  SDLoc dl(Op);
4654 
4655  // We chose ZeroOrOneBooleanContents, so use zero and one.
4656  EVT VT = Op.getValueType();
4657  SDValue TVal = DAG.getConstant(1, dl, VT);
4658  SDValue FVal = DAG.getConstant(0, dl, VT);
4659 
4660  // Handle f128 first, since one possible outcome is a normal integer
4661  // comparison which gets picked up by the next if statement.
4662  if (LHS.getValueType() == MVT::f128) {
4663  softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
4664 
4665  // If softenSetCCOperands returned a scalar, use it.
4666  if (!RHS.getNode()) {
4667  assert(LHS.getValueType() == Op.getValueType() &&
4668  "Unexpected setcc expansion!");
4669  return LHS;
4670  }
4671  }
4672 
4673  if (LHS.getValueType().isInteger()) {
4674  SDValue CCVal;
4675  SDValue Cmp =
4676  getAArch64Cmp(LHS, RHS, ISD::getSetCCInverse(CC, true), CCVal, DAG, dl);
4677 
4678  // Note that we inverted the condition above, so we reverse the order of
4679  // the true and false operands here. This will allow the setcc to be
4680  // matched to a single CSINC instruction.
4681  return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
4682  }
4683 
4684  // Now we know we're dealing with FP values.
4685  assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||
4686  LHS.getValueType() == MVT::f64);
4687 
4688  // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
4689  // and do the comparison.
4690  SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
4691 
4692  AArch64CC::CondCode CC1, CC2;
4693  changeFPCCToAArch64CC(CC, CC1, CC2);
4694  if (CC2 == AArch64CC::AL) {
4695  changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, false), CC1, CC2);
4696  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4697 
4698  // Note that we inverted the condition above, so we reverse the order of
4699  // the true and false operands here. This will allow the setcc to be
4700  // matched to a single CSINC instruction.
4701  return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
4702  } else {
4703  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
4704  // totally clean. Some of them require two CSELs to implement. As is in
4705  // this case, we emit the first CSEL and then emit a second using the output
4706  // of the first as the RHS. We're effectively OR'ing the two CC's together.
4707 
4708  // FIXME: It would be nice if we could match the two CSELs to two CSINCs.
4709  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4710  SDValue CS1 =
4711  DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
4712 
4713  SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
4714  return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
4715  }
4716 }
4717 
4718 SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
4719  SDValue RHS, SDValue TVal,
4720  SDValue FVal, const SDLoc &dl,
4721  SelectionDAG &DAG) const {
4722  // Handle f128 first, because it will result in a comparison of some RTLIB
4723  // call result against zero.
4724  if (LHS.getValueType() == MVT::f128) {
4725  softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
4726 
4727  // If softenSetCCOperands returned a scalar, we need to compare the result
4728  // against zero to select between true and false values.
4729  if (!RHS.getNode()) {
4730  RHS = DAG.getConstant(0, dl, LHS.getValueType());
4731  CC = ISD::SETNE;
4732  }
4733  }
4734 
4735  // Also handle f16, for which we need to do a f32 comparison.
4736  if (LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
4737  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
4738  RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
4739  }
4740 
4741  // Next, handle integers.
4742  if (LHS.getValueType().isInteger()) {
4743  assert((LHS.getValueType() == RHS.getValueType()) &&
4744  (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64));
4745 
4746  unsigned Opcode = AArch64ISD::CSEL;
4747 
4748  // If both the TVal and the FVal are constants, see if we can swap them in
4749  // order to for a CSINV or CSINC out of them.
4750  ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
4751  ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
4752 
4753  if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) {
4754  std::swap(TVal, FVal);
4755  std::swap(CTVal, CFVal);
4756  CC = ISD::getSetCCInverse(CC, true);
4757  } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isNullValue()) {
4758  std::swap(TVal, FVal);
4759  std::swap(CTVal, CFVal);
4760  CC = ISD::getSetCCInverse(CC, true);
4761  } else if (TVal.getOpcode() == ISD::XOR) {
4762  // If TVal is a NOT we want to swap TVal and FVal so that we can match
4763  // with a CSINV rather than a CSEL.
4764  if (isAllOnesConstant(TVal.getOperand(1))) {
4765  std::swap(TVal, FVal);
4766  std::swap(CTVal, CFVal);
4767  CC = ISD::getSetCCInverse(CC, true);
4768  }
4769  } else if (TVal.getOpcode() == ISD::SUB) {
4770  // If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
4771  // that we can match with a CSNEG rather than a CSEL.
4772  if (isNullConstant(TVal.getOperand(0))) {
4773  std::swap(TVal, FVal);
4774  std::swap(CTVal, CFVal);
4775  CC = ISD::getSetCCInverse(CC, true);
4776  }
4777  } else if (CTVal && CFVal) {
4778  const int64_t TrueVal = CTVal->getSExtValue();
4779  const int64_t FalseVal = CFVal->getSExtValue();
4780  bool Swap = false;
4781 
4782  // If both TVal and FVal are constants, see if FVal is the
4783  // inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC
4784  // instead of a CSEL in that case.
4785  if (TrueVal == ~FalseVal) {
4786  Opcode = AArch64ISD::CSINV;
4787  } else if (TrueVal == -FalseVal) {
4788  Opcode = AArch64ISD::CSNEG;
4789  } else if (TVal.getValueType() == MVT::i32) {
4790  // If our operands are only 32-bit wide, make sure we use 32-bit
4791  // arithmetic for the check whether we can use CSINC. This ensures that
4792  // the addition in the check will wrap around properly in case there is
4793  // an overflow (which would not be the case if we do the check with
4794  // 64-bit arithmetic).
4795  const uint32_t TrueVal32 = CTVal->getZExtValue();
4796  const uint32_t FalseVal32 = CFVal->getZExtValue();
4797 
4798  if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
4799  Opcode = AArch64ISD::CSINC;
4800 
4801  if (TrueVal32 > FalseVal32) {
4802  Swap = true;
4803  }
4804  }
4805  // 64-bit check whether we can use CSINC.
4806  } else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) {
4807  Opcode = AArch64ISD::CSINC;
4808 
4809  if (TrueVal > FalseVal) {
4810  Swap = true;
4811  }
4812  }
4813 
4814  // Swap TVal and FVal if necessary.
4815  if (Swap) {
4816  std::swap(TVal, FVal);
4817  std::swap(CTVal, CFVal);
4818  CC = ISD::getSetCCInverse(CC, true);
4819  }
4820 
4821  if (Opcode != AArch64ISD::CSEL) {
4822  // Drop FVal since we can get its value by simply inverting/negating
4823  // TVal.
4824  FVal = TVal;
4825  }
4826  }
4827 
4828  // Avoid materializing a constant when possible by reusing a known value in
4829  // a register. However, don't perform this optimization if the known value
4830  // is one, zero or negative one in the case of a CSEL. We can always
4831  // materialize these values using CSINC, CSEL and CSINV with wzr/xzr as the
4832  // FVal, respectively.
4833  ConstantSDNode *RHSVal = dyn_cast<ConstantSDNode>(RHS);
4834  if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
4835  !RHSVal->isNullValue() && !RHSVal->isAllOnesValue()) {
4837  // Transform "a == C ? C : x" to "a == C ? a : x" and "a != C ? x : C" to
4838  // "a != C ? x : a" to avoid materializing C.
4839  if (CTVal && CTVal == RHSVal && AArch64CC == AArch64CC::EQ)
4840  TVal = LHS;
4841  else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE)
4842  FVal = LHS;
4843  } else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
4844  assert (CTVal && CFVal && "Expected constant operands for CSNEG.");
4845  // Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
4846  // avoid materializing C.
4848  if (CTVal == RHSVal && AArch64CC == AArch64CC::EQ) {
4849  Opcode = AArch64ISD::CSINV;
4850  TVal = LHS;
4851  FVal = DAG.getConstant(0, dl, FVal.getValueType());
4852  }
4853  }
4854 
4855  SDValue CCVal;
4856  SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
4857  EVT VT = TVal.getValueType();
4858  return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
4859  }
4860 
4861  // Now we know we're dealing with FP values.
4862  assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||
4863  LHS.getValueType() == MVT::f64);
4864  assert(LHS.getValueType() == RHS.getValueType());
4865  EVT VT = TVal.getValueType();
4866  SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
4867 
4868  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
4869  // clean. Some of them require two CSELs to implement.
4870  AArch64CC::CondCode CC1, CC2;
4871  changeFPCCToAArch64CC(CC, CC1, CC2);
4872 
4873  if (DAG.getTarget().Options.UnsafeFPMath) {
4874  // Transform "a == 0.0 ? 0.0 : x" to "a == 0.0 ? a : x" and
4875  // "a != 0.0 ? x : 0.0" to "a != 0.0 ? x : a" to avoid materializing 0.0.
4876  ConstantFPSDNode *RHSVal = dyn_cast<ConstantFPSDNode>(RHS);
4877  if (RHSVal && RHSVal->isZero()) {
4878  ConstantFPSDNode *CFVal = dyn_cast<ConstantFPSDNode>(FVal);
4879  ConstantFPSDNode *CTVal = dyn_cast<ConstantFPSDNode>(TVal);
4880 
4881  if ((CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETUEQ) &&
4882  CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType())
4883  TVal = LHS;
4884  else if ((CC == ISD::SETNE || CC == ISD::SETONE || CC == ISD::SETUNE) &&
4885  CFVal && CFVal->isZero() &&
4886  FVal.getValueType() == LHS.getValueType())
4887  FVal = LHS;
4888  }
4889  }
4890 
4891  // Emit first, and possibly only, CSEL.
4892  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4893  SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
4894 
4895  // If we need a second CSEL, emit it, using the output of the first as the
4896  // RHS. We're effectively OR'ing the two CC's together.
4897  if (CC2 != AArch64CC::AL) {
4898  SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
4899  return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
4900  }
4901 
4902  // Otherwise, return the output of the first CSEL.
4903  return CS1;
4904 }
4905 
4906 SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
4907  SelectionDAG &DAG) const {
4908  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4909  SDValue LHS = Op.getOperand(0);
4910  SDValue RHS = Op.getOperand(1);
4911  SDValue TVal = Op.getOperand(2);
4912  SDValue FVal = Op.getOperand(3);
4913  SDLoc DL(Op);
4914  return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
4915 }
4916 
4917 SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
4918  SelectionDAG &DAG) const {
4919  SDValue CCVal = Op->getOperand(0);
4920  SDValue TVal = Op->getOperand(1);
4921  SDValue FVal = Op->getOperand(2);
4922  SDLoc DL(Op);
4923 
4924  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
4925  // instruction.
4926  if (isOverflowIntrOpRes(CCVal)) {
4927  // Only lower legal XALUO ops.
4928  if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
4929  return SDValue();
4930 
4931  AArch64CC::CondCode OFCC;
4932  SDValue Value, Overflow;
4933  std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
4934  SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
4935 
4936  return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
4937  CCVal, Overflow);
4938  }
4939 
4940  // Lower it the same way as we would lower a SELECT_CC node.
4941  ISD::CondCode CC;
4942  SDValue LHS, RHS;
4943  if (CCVal.getOpcode() == ISD::SETCC) {
4944  LHS = CCVal.getOperand(0);
4945  RHS = CCVal.getOperand(1);
4946  CC = cast<CondCodeSDNode>(CCVal->getOperand(2))->get();
4947  } else {
4948  LHS = CCVal;
4949  RHS = DAG.getConstant(0, DL, CCVal.getValueType());
4950  CC = ISD::SETNE;
4951  }
4952  return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
4953 }
4954 
4955 SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
4956  SelectionDAG &DAG) const {
4957  // Jump table entries as PC relative offsets. No additional tweaking
4958  // is necessary here. Just get the address of the jump table.
4959  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
4960 
4962  !Subtarget->isTargetMachO()) {
4963  return getAddrLarge(JT, DAG);
4964  } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
4965  return getAddrTiny(JT, DAG);
4966  }
4967  return getAddr(JT, DAG);
4968 }
4969 
4970 SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
4971  SelectionDAG &DAG) const {
4972  // Jump table entries as PC relative offsets. No additional tweaking
4973  // is necessary here. Just get the address of the jump table.
4974  SDLoc DL(Op);
4975  SDValue JT = Op.getOperand(1);
4976  SDValue Entry = Op.getOperand(2);
4977  int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();
4978 
4979  SDNode *Dest =
4980  DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
4981  Entry, DAG.getTargetJumpTable(JTI, MVT::i32));
4982  return DAG.getNode(ISD::BRIND, DL, MVT::Other, Op.getOperand(0),
4983  SDValue(Dest, 0));
4984 }
4985 
4986 SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
4987  SelectionDAG &DAG) const {
4988  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
4989 
4991  // Use the GOT for the large code model on iOS.
4992  if (Subtarget->isTargetMachO()) {
4993  return getGOT(CP, DAG);
4994  }
4995  return getAddrLarge(CP, DAG);
4996  } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
4997  return getAddrTiny(CP, DAG);
4998  } else {
4999  return getAddr(CP, DAG);
5000  }
5001 }
5002 
5003 SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
5004  SelectionDAG &DAG) const {
5005  BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
5007  !Subtarget->isTargetMachO()) {
5008  return getAddrLarge(BA, DAG);
5009  } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
5010  return getAddrTiny(BA, DAG);
5011  }
5012  return getAddr(BA, DAG);
5013 }
5014 
5015 SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
5016  SelectionDAG &DAG) const {
5017  AArch64FunctionInfo *FuncInfo =
5019 
5020  SDLoc DL(Op);
5021  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
5022  getPointerTy(DAG.getDataLayout()));
5023  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
5024  return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
5025  MachinePointerInfo(SV));
5026 }
5027 
5028 SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
5029  SelectionDAG &DAG) const {
5030  AArch64FunctionInfo *FuncInfo =
5032 
5033  SDLoc DL(Op);
5034  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
5035  ? FuncInfo->getVarArgsGPRIndex()
5036  : FuncInfo->getVarArgsStackIndex(),
5037  getPointerTy(DAG.getDataLayout()));
5038  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
5039  return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
5040  MachinePointerInfo(SV));
5041 }
5042 
5043 SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
5044  SelectionDAG &DAG) const {
5045  // The layout of the va_list struct is specified in the AArch64 Procedure Call
5046  // Standard, section B.3.
5047  MachineFunction &MF = DAG.getMachineFunction();
5049  auto PtrVT = getPointerTy(DAG.getDataLayout());
5050  SDLoc DL(Op);
5051 
5052  SDValue Chain = Op.getOperand(0);
5053  SDValue VAList = Op.getOperand(1);
5054  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
5055  SmallVector<SDValue, 4> MemOps;
5056 
5057  // void *__stack at offset 0
5058  SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
5059  MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
5060  MachinePointerInfo(SV), /* Alignment = */ 8));
5061 
5062  // void *__gr_top at offset 8
5063  int GPRSize = FuncInfo->getVarArgsGPRSize();
5064  if (GPRSize > 0) {
5065  SDValue GRTop, GRTopAddr;
5066 
5067  GRTopAddr =
5068  DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(8, DL, PtrVT));
5069 
5070  GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
5071  GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
5072  DAG.getConstant(GPRSize, DL, PtrVT));
5073 
5074  MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
5075  MachinePointerInfo(SV, 8),
5076  /* Alignment = */ 8));
5077  }
5078 
5079  // void *__vr_top at offset 16
5080  int FPRSize = FuncInfo->getVarArgsFPRSize();
5081  if (FPRSize > 0) {
5082  SDValue VRTop, VRTopAddr;
5083  VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
5084  DAG.getConstant(16, DL, PtrVT));
5085 
5086  VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
5087  VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
5088  DAG.getConstant(FPRSize, DL, PtrVT));
5089 
5090  MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
5091  MachinePointerInfo(SV, 16),
5092  /* Alignment = */ 8));
5093  }
5094 
5095  // int __gr_offs at offset 24
5096  SDValue GROffsAddr =
5097  DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(24, DL, PtrVT));
5098  MemOps.push_back(DAG.getStore(
5099  Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32), GROffsAddr,
5100  MachinePointerInfo(SV, 24), /* Alignment = */ 4));
5101 
5102  // int __vr_offs at offset 28
5103  SDValue VROffsAddr =
5104  DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(28, DL, PtrVT));
5105  MemOps.push_back(DAG.getStore(
5106  Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32), VROffsAddr,
5107  MachinePointerInfo(SV, 28), /* Alignment = */ 4));
5108 
5109  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
5110 }
5111 
5112 SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
5113  SelectionDAG &DAG) const {
5114  MachineFunction &MF = DAG.getMachineFunction();
5115 
5116  if (Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()))
5117  return LowerWin64_VASTART(Op, DAG);
5118  else if (Subtarget->isTargetDarwin())
5119  return LowerDarwin_VASTART(Op, DAG);
5120  else
5121  return LowerAAPCS_VASTART(Op, DAG);
5122 }
5123 
5124 SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
5125  SelectionDAG &DAG) const {
5126  // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
5127  // pointer.
5128  SDLoc DL(Op);
5129  unsigned VaListSize =
5130  Subtarget->isTargetDarwin() || Subtarget->isTargetWindows() ? 8 : 32;
5131  const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
5132  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
5133 
5134  return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1),
5135  Op.getOperand(2),
5136  DAG.getConstant(VaListSize, DL, MVT::i32),
5137  8, false, false, false, MachinePointerInfo(DestSV),
5138  MachinePointerInfo(SrcSV));
5139 }
5140 
5141 SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
5142  assert(Subtarget->isTargetDarwin() &&
5143  "automatic va_arg instruction only works on Darwin");
5144 
5145  const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
5146  EVT VT = Op.getValueType();
5147  SDLoc DL(Op);
5148  SDValue Chain = Op.getOperand(0);
5149  SDValue Addr = Op.getOperand(1);
5150  unsigned Align = Op.getConstantOperandVal(3);
5151  auto PtrVT = getPointerTy(DAG.getDataLayout());
5152 
5153  SDValue VAList = DAG.getLoad(PtrVT, DL, Chain, Addr, MachinePointerInfo(V));
5154  Chain = VAList.getValue(1);
5155 
5156  if (Align > 8) {
5157  assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2");
5158  VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
5159  DAG.getConstant(Align - 1, DL, PtrVT));
5160  VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
5161  DAG.getConstant(-(int64_t)Align, DL, PtrVT));
5162  }
5163 
5164  Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
5165  uint64_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
5166 
5167  // Scalar integer and FP values smaller than 64 bits are implicitly extended
5168  // up to 64 bits. At the very least, we have to increase the striding of the
5169  // vaargs list to match this, and for FP values we need to introduce
5170  // FP_ROUND nodes as well.
5171  if (VT.isInteger() && !VT.isVector())
5172  ArgSize = 8;
5173  bool NeedFPTrunc = false;
5174  if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) {
5175  ArgSize = 8;
5176  NeedFPTrunc = true;
5177  }
5178 
5179  // Increment the pointer, VAList, to the next vaarg
5180  SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
5181  DAG.getConstant(ArgSize, DL, PtrVT));
5182  // Store the incremented VAList to the legalized pointer
5183  SDValue APStore =
5184  DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V));
5185 
5186  // Load the actual argument out of the pointer VAList
5187  if (NeedFPTrunc) {
5188  // Load the value as an f64.
5189  SDValue WideFP =
5190  DAG.getLoad(MVT::f64, DL, APStore, VAList, MachinePointerInfo());
5191  // Round the value down to an f32.
5192  SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
5193  DAG.getIntPtrConstant(1, DL));
5194  SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
5195  // Merge the rounded value with the chain output of the load.
5196  return DAG.getMergeValues(Ops, DL);
5197  }
5198 
5199  return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo());
5200 }
5201 
5202 SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
5203  SelectionDAG &DAG) const {
5205  MFI.setFrameAddressIsTaken(true);
5206 
5207  EVT VT = Op.getValueType();
5208  SDLoc DL(Op);
5209  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5210  SDValue FrameAddr =
5211  DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
5212  while (Depth--)
5213  FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr,
5214  MachinePointerInfo());
5215  return FrameAddr;
5216 }
5217 
5218 SDValue AArch64TargetLowering::LowerSPONENTRY(SDValue Op,
5219  SelectionDAG &DAG) const {
5221 
5222  EVT VT = getPointerTy(DAG.getDataLayout());
5223  SDLoc DL(Op);
5224  int FI = MFI.CreateFixedObject(4, 0, false);
5225  return DAG.getFrameIndex(FI, VT);
5226 }
5227 
5228 // FIXME? Maybe this could be a TableGen attribute on some registers and
5229 // this table could be generated automatically from RegInfo.
5230 unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT,
5231  SelectionDAG &DAG) const {
5232  unsigned Reg = StringSwitch<unsigned>(RegName)
5233  .Case("sp", AArch64::SP)
5234  .Case("x1", AArch64::X1)
5235  .Case("w1", AArch64::W1)
5236  .Case("x2", AArch64::X2)
5237  .Case("w2", AArch64::W2)
5238  .Case("x3", AArch64::X3)
5239  .Case("w3", AArch64::W3)
5240  .Case("x4", AArch64::X4)
5241  .Case("w4", AArch64::W4)
5242  .Case("x5", AArch64::X5)
5243  .Case("w5", AArch64::W5)
5244  .Case("x6", AArch64::X6)
5245  .Case("w6", AArch64::W6)
5246  .Case("x7", AArch64::X7)
5247  .Case("w7", AArch64::W7)
5248  .Case("x18", AArch64::X18)
5249  .Case("w18", AArch64::W18)
5250  .Case("x20", AArch64::X20)
5251  .Case("w20", AArch64::W20)
5252  .Default(0);
5253  if (((Reg == AArch64::X1 || Reg == AArch64::W1) &&
5254  !Subtarget->isXRegisterReserved(1)) ||
5255  ((Reg == AArch64::X2 || Reg == AArch64::W2) &&
5256  !Subtarget->isXRegisterReserved(2)) ||
5257  ((Reg == AArch64::X3 || Reg == AArch64::W3) &&
5258  !Subtarget->isXRegisterReserved(3)) ||
5259  ((Reg == AArch64::X4 || Reg == AArch64::W4) &&
5260  !Subtarget->isXRegisterReserved(4)) ||
5261  ((Reg == AArch64::X5 || Reg == AArch64::W5) &&
5262  !Subtarget->isXRegisterReserved(5)) ||
5263  ((Reg == AArch64::X6 || Reg == AArch64::W6) &&
5264  !Subtarget->isXRegisterReserved(6)) ||
5265  ((Reg == AArch64::X7 || Reg == AArch64::W7) &&
5266  !Subtarget->isXRegisterReserved(7)) ||
5267  ((Reg == AArch64::X18 || Reg == AArch64::W18) &&
5268  !Subtarget->isXRegisterReserved(18)) ||
5269  ((Reg == AArch64::X20 || Reg == AArch64::W20) &&
5270  !Subtarget->isXRegisterReserved(20)))
5271  Reg = 0;
5272  if (Reg)
5273  return Reg;
5274  report_fatal_error(Twine("Invalid register name \""
5275  + StringRef(RegName) + "\"."));
5276 }
5277 
5278 SDValue AArch64TargetLowering::LowerADDROFRETURNADDR(SDValue Op,
5279  SelectionDAG &DAG) const {
5281 
5282  EVT VT = Op.getValueType();
5283  SDLoc DL(Op);
5284 
5285  SDValue FrameAddr =
5286  DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
5287  SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
5288 
5289  return DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset);
5290 }
5291 
5292 SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
5293  SelectionDAG &DAG) const {
5294  MachineFunction &MF = DAG.getMachineFunction();
5295  MachineFrameInfo &MFI = MF.getFrameInfo();
5296  MFI.setReturnAddressIsTaken(true);
5297 
5298  EVT VT = Op.getValueType();
5299  SDLoc DL(Op);
5300  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5301  if (Depth) {
5302  SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
5303  SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
5304  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
5305  DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
5306  MachinePointerInfo());
5307  }
5308 
5309  // Return LR, which contains the return address. Mark it an implicit live-in.
5310  unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
5311  return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
5312 }
5313 
5314 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
5315 /// i64 values and take a 2 x i64 value to shift plus a shift amount.
5316 SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
5317  SelectionDAG &DAG) const {
5318  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
5319  EVT VT = Op.getValueType();
5320  unsigned VTBits = VT.getSizeInBits();
5321  SDLoc dl(Op);
5322  SDValue ShOpLo = Op.getOperand(0);
5323  SDValue ShOpHi = Op.getOperand(1);
5324  SDValue ShAmt = Op.getOperand(2);
5325  unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
5326 
5328 
5329  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
5330  DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
5331  SDValue HiBitsForLo = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
5332 
5333  // Unfortunately, if ShAmt == 0, we just calculated "(SHL ShOpHi, 64)" which
5334  // is "undef". We wanted 0, so CSEL it directly.
5335  SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
5336  ISD::SETEQ, dl, DAG);
5337  SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
5338  HiBitsForLo =
5339  DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
5340  HiBitsForLo, CCVal, Cmp);
5341 
5342  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
5343  DAG.getConstant(VTBits, dl, MVT::i64));
5344 
5345  SDValue LoBitsForLo = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
5346  SDValue LoForNormalShift =
5347  DAG.getNode(ISD::OR, dl, VT, LoBitsForLo, HiBitsForLo);
5348 
5349  Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
5350  dl, DAG);
5351  CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
5352  SDValue LoForBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
5353  SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
5354  LoForNormalShift, CCVal, Cmp);
5355 
5356  // AArch64 shifts larger than the register width are wrapped rather than
5357  // clamped, so we can't just emit "hi >> x".
5358  SDValue HiForNormalShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
5359  SDValue HiForBigShift =
5360  Opc == ISD::SRA
5361  ? DAG.getNode(Opc, dl, VT, ShOpHi,
5362  DAG.getConstant(VTBits - 1, dl, MVT::i64))
5363  : DAG.getConstant(0, dl, VT);
5364  SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
5365  HiForNormalShift, CCVal, Cmp);
5366 
5367  SDValue Ops[2] = { Lo, Hi };
5368  return DAG.getMergeValues(Ops, dl);
5369 }
5370 
5371 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
5372 /// i64 values and take a 2 x i64 value to shift plus a shift amount.
5373 SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
5374  SelectionDAG &DAG) const {
5375  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
5376  EVT VT = Op.getValueType();
5377  unsigned VTBits = VT.getSizeInBits();
5378  SDLoc dl(Op);
5379  SDValue ShOpLo = Op.getOperand(0);
5380  SDValue ShOpHi = Op.getOperand(1);
5381  SDValue ShAmt = Op.getOperand(2);
5382 
5383  assert(Op.getOpcode() == ISD::SHL_PARTS);
5384  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
5385  DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
5386  SDValue LoBitsForHi = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
5387 
5388  // Unfortunately, if ShAmt == 0, we just calculated "(SRL ShOpLo, 64)" which
5389  // is "undef". We wanted 0, so CSEL it directly.
5390  SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
5391  ISD::SETEQ, dl, DAG);
5392  SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
5393  LoBitsForHi =
5394  DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
5395  LoBitsForHi, CCVal, Cmp);
5396 
5397  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
5398  DAG.getConstant(VTBits, dl, MVT::i64));
5399  SDValue HiBitsForHi = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
5400  SDValue HiForNormalShift =
5401  DAG.getNode(ISD::OR, dl, VT, LoBitsForHi, HiBitsForHi);
5402 
5403  SDValue HiForBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
5404 
5405  Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
5406  dl, DAG);
5407  CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
5408  SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
5409  HiForNormalShift, CCVal, Cmp);
5410 
5411  // AArch64 shifts of larger than register sizes are wrapped rather than
5412  // clamped, so we can't just emit "lo << a" if a is too big.
5413  SDValue LoForBigShift = DAG.getConstant(0, dl, VT);
5414  SDValue LoForNormalShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
5415  SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
5416  LoForNormalShift, CCVal, Cmp);
5417 
5418  SDValue Ops[2] = { Lo, Hi };
5419  return DAG.getMergeValues(Ops, dl);
5420 }
5421 
5423  const GlobalAddressSDNode *GA) const {
5424  // Offsets are folded in the DAG combine rather than here so that we can
5425  // intelligently choose an offset based on the uses.
5426  return false;
5427 }
5428 
5430  // We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases.
5431  // FIXME: We should be able to handle f128 as well with a clever lowering.
5432  if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32 ||
5433  (VT == MVT::f16 && Subtarget->hasFullFP16()))) {
5434  LLVM_DEBUG(dbgs() << "Legal " << VT.getEVTString() << " imm value: 0\n");
5435  return true;
5436  }
5437 
5438  bool IsLegal = false;
5439  SmallString<128> ImmStrVal;
5440  Imm.toString(ImmStrVal);
5441 
5442  if (VT == MVT::f64)
5443  IsLegal = AArch64_AM::getFP64Imm(Imm) != -1;
5444  else if (VT == MVT::f32)
5445  IsLegal = AArch64_AM::getFP32Imm(Imm) != -1;
5446  else if (VT == MVT::f16 && Subtarget->hasFullFP16())
5447  IsLegal = AArch64_AM::getFP16Imm(Imm) != -1;
5448 
5449  if (IsLegal) {
5450  LLVM_DEBUG(dbgs() << "Legal " << VT.getEVTString()
5451  << " imm value: " << ImmStrVal << "\n");
5452  return true;
5453  }
5454 
5455  LLVM_DEBUG(dbgs() << "Illegal " << VT.getEVTString()
5456  << " imm value: " << ImmStrVal << "\n");
5457  return false;
5458 }
5459 
5460 //===----------------------------------------------------------------------===//
5461 // AArch64 Optimization Hooks
5462 //===----------------------------------------------------------------------===//
5463 
5464 static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
5465  SDValue Operand, SelectionDAG &DAG,
5466  int &ExtraSteps) {
5467  EVT VT = Operand.getValueType();
5468  if (ST->hasNEON() &&
5469  (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
5470  VT == MVT::f32 || VT == MVT::v1f32 ||
5471  VT == MVT::v2f32 || VT == MVT::v4f32)) {
5472  if (ExtraSteps == TargetLoweringBase::ReciprocalEstimate::Unspecified)
5473  // For the reciprocal estimates, convergence is quadratic, so the number
5474  // of digits is doubled after each iteration. In ARMv8, the accuracy of
5475  // the initial estimate is 2^-8. Thus the number of extra steps to refine
5476  // the result for float (23 mantissa bits) is 2 and for double (52
5477  // mantissa bits) is 3.
5478  ExtraSteps = VT.getScalarType() == MVT::f64 ? 3 : 2;
5479 
5480  return DAG.getNode(Opcode, SDLoc(Operand), VT, Operand);
5481  }
5482 
5483  return SDValue();
5484 }
5485 
5486 SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
5487  SelectionDAG &DAG, int Enabled,
5488  int &ExtraSteps,
5489  bool &UseOneConst,
5490  bool Reciprocal) const {
5491  if (Enabled == ReciprocalEstimate::Enabled ||
5492  (Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))
5493  if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRSQRTE, Operand,
5494  DAG, ExtraSteps)) {
5495  SDLoc DL(Operand);
5496  EVT VT = Operand.getValueType();
5497 
5498  SDNodeFlags Flags;
5499  Flags.setAllowReassociation(true);
5500 
5501  // Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
5502  // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
5503  for (int i = ExtraSteps; i > 0; --i) {
5504  SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate,
5505  Flags);
5506  Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, Flags);
5507  Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
5508  }
5509  if (!Reciprocal) {
5510  EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
5511  VT);
5512  SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
5513  SDValue Eq = DAG.getSetCC(DL, CCVT, Operand, FPZero, ISD::SETEQ);
5514 
5515  Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, Flags);
5516  // Correct the result if the operand is 0.0.
5517  Estimate = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL,
5518  VT, Eq, Operand, Estimate);
5519  }
5520 
5521  ExtraSteps = 0;
5522  return Estimate;
5523  }
5524 
5525  return SDValue();
5526 }
5527 
5528 SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand,
5529  SelectionDAG &DAG, int Enabled,
5530  int &ExtraSteps) const {
5531  if (Enabled == ReciprocalEstimate::Enabled)
5532  if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRECPE, Operand,
5533  DAG, ExtraSteps)) {
5534  SDLoc DL(Operand);
5535  EVT VT = Operand.getValueType();
5536 
5537  SDNodeFlags Flags;
5538  Flags.setAllowReassociation(true);
5539 
5540  // Newton reciprocal iteration: E * (2 - X * E)
5541  // AArch64 reciprocal iteration instruction: (2 - M * N)
5542  for (int i = ExtraSteps; i > 0; --i) {
5543  SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand,
5544  Estimate, Flags);
5545  Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
5546  }
5547 
5548  ExtraSteps = 0;
5549  return Estimate;
5550  }
5551 
5552  return SDValue();
5553 }
5554 
5555 //===----------------------------------------------------------------------===//
5556 // AArch64 Inline Assembly Support
5557 //===----------------------------------------------------------------------===//
5558 
5559 // Table of Constraints
5560 // TODO: This is the current set of constraints supported by ARM for the
5561 // compiler, not all of them may make sense.
5562 //
5563 // r - A general register
5564 // w - An FP/SIMD register of some size in the range v0-v31
5565 // x - An FP/SIMD register of some size in the range v0-v15
5566 // I - Constant that can be used with an ADD instruction
5567 // J - Constant that can be used with a SUB instruction
5568 // K - Constant that can be used with a 32-bit logical instruction
5569 // L - Constant that can be used with a 64-bit logical instruction
5570 // M - Constant that can be used as a 32-bit MOV immediate
5571 // N - Constant that can be used as a 64-bit MOV immediate
5572 // Q - A memory reference with base register and no offset
5573 // S - A symbolic address
5574 // Y - Floating point constant zero
5575 // Z - Integer constant zero
5576 //
5577 // Note that general register operands will be output using their 64-bit x
5578 // register name, whatever the size of the variable, unless the asm operand
5579 // is prefixed by the %w modifier. Floating-point and SIMD register operands
5580 // will be output with the v prefix unless prefixed by the %b, %h, %s, %d or
5581 // %q modifier.
5582 const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5583  // At this point, we have to lower this constraint to something else, so we
5584  // lower it to an "r" or "w". However, by doing this we will force the result
5585  // to be in register, while the X constraint is much more permissive.
5586  //
5587  // Although we are correct (we are free to emit anything, without
5588  // constraints), we might break use cases that would expect us to be more
5589  // efficient and emit something else.
5590  if (!Subtarget->hasFPARMv8())
5591  return "r";
5592 
5593  if (ConstraintVT.isFloatingPoint())
5594  return "w";
5595 
5596  if (ConstraintVT.isVector() &&
5597  (ConstraintVT.getSizeInBits() == 64 ||
5598  ConstraintVT.getSizeInBits() == 128))
5599  return "w";
5600 
5601  return "r";
5602 }
5603 
5604 /// getConstraintType - Given a constraint letter, return the type of
5605 /// constraint it is for this target.
5607 AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
5608  if (Constraint.size() == 1) {
5609  switch (Constraint[0]) {
5610  default:
5611  break;
5612  case 'z':
5613  return C_Other;
5614  case 'x':
5615  case 'w':
5616  return C_RegisterClass;
5617  // An address with a single base register. Due to the way we
5618  // currently handle addresses it is the same as 'r'.
5619  case 'Q':
5620  return C_Memory;
5621  case 'S': // A symbolic address
5622  return C_Other;
5623  }
5624  }
5625  return TargetLowering::getConstraintType(Constraint);
5626 }
5627 
5628 /// Examine constraint type and operand type and determine a weight value.
5629 /// This object must already have been set up with the operand type
5630 /// and the current alternative constraint selected.
5632 AArch64TargetLowering::getSingleConstraintMatchWeight(
5633  AsmOperandInfo &info, const char *constraint) const {
5634  ConstraintWeight weight = CW_Invalid;
5635  Value *CallOperandVal = info.CallOperandVal;
5636  // If we don't have a value, we can't do a match,
5637  // but allow it at the lowest weight.
5638  if (!CallOperandVal)
5639  return CW_Default;
5640  Type *type = CallOperandVal->getType();
5641  // Look at the constraint type.
5642  switch (*constraint) {
5643  default:
5644  weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
5645  break;
5646  case 'x':
5647  case 'w':
5648  if (type->isFloatingPointTy() || type->isVectorTy())
5649  weight = CW_Register;
5650  break;
5651  case 'z':
5652  weight = CW_Constant;
5653  break;
5654  }
5655  return weight;
5656 }
5657 
5658 std::pair<unsigned, const TargetRegisterClass *>
5659 AArch64TargetLowering::getRegForInlineAsmConstraint(
5660  const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
5661  if (Constraint.size() == 1) {
5662  switch (Constraint[0]) {
5663  case 'r':
5664  if (VT.getSizeInBits() == 64)
5665  return std::make_pair(0U, &AArch64::GPR64commonRegClass);
5666  return std::make_pair(0U, &AArch64::GPR32commonRegClass);
5667  case 'w':
5668  if (!Subtarget->hasFPARMv8())
5669  break;
5670  if (VT.getSizeInBits() == 16)
5671  return std::make_pair(0U, &AArch64::FPR16RegClass);
5672  if (VT.getSizeInBits() == 32)
5673  return std::make_pair(0U, &AArch64::FPR32RegClass);
5674  if (VT.getSizeInBits() == 64)
5675  return std::make_pair(0U, &AArch64::FPR64RegClass);
5676  if (VT.getSizeInBits() == 128)
5677  return std::make_pair(0U, &AArch64::FPR128RegClass);
5678  break;
5679  // The instructions that this constraint is designed for can
5680  // only take 128-bit registers so just use that regclass.
5681  case 'x':
5682  if (!Subtarget->hasFPARMv8())
5683  break;
5684  if (VT.getSizeInBits() == 128)
5685  return std::make_pair(0U, &AArch64::FPR128_loRegClass);
5686  break;
5687  }
5688  }
5689  if (StringRef("{cc}").equals_lower(Constraint))
5690  return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
5691 
5692  // Use the default implementation in TargetLowering to convert the register
5693  // constraint into a member of a register class.
5694  std::pair<unsigned, const TargetRegisterClass *> Res;
5695  Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
5696 
5697  // Not found as a standard register?
5698  if (!Res.second) {
5699  unsigned Size = Constraint.size();
5700  if ((Size == 4 || Size == 5) && Constraint[0] == '{' &&
5701  tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
5702  int RegNo;
5703  bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo);
5704  if (!Failed && RegNo >= 0 && RegNo <= 31) {
5705  // v0 - v31 are aliases of q0 - q31 or d0 - d31 depending on size.
5706  // By default we'll emit v0-v31 for this unless there's a modifier where
5707  // we'll emit the correct register as well.
5708  if (VT != MVT::Other && VT.getSizeInBits() == 64) {
5709  Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
5710  Res.second = &AArch64::FPR64RegClass;
5711  } else {
5712  Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
5713  Res.second = &AArch64::FPR128RegClass;
5714  }
5715  }
5716  }
5717  }
5718 
5719  if (Res.second && !Subtarget->hasFPARMv8() &&
5720  !AArch64::GPR32allRegClass.hasSubClassEq(Res.second) &&
5721  !AArch64::GPR64allRegClass.hasSubClassEq(Res.second))
5722  return std::make_pair(0U, nullptr);
5723 
5724  return Res;
5725 }
5726 
5727 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
5728 /// vector. If it is invalid, don't add anything to Ops.
5729 void AArch64TargetLowering::LowerAsmOperandForConstraint(
5730  SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
5731  SelectionDAG &DAG) const {
5732  SDValue Result;
5733 
5734  // Currently only support length 1 constraints.
5735  if (Constraint.length() != 1)
5736  return;
5737 
5738  char ConstraintLetter = Constraint[0];
5739  switch (ConstraintLetter) {
5740  default:
5741  break;
5742 
5743  // This set of constraints deal with valid constants for various instructions.
5744  // Validate and return a target constant for them if we can.
5745  case 'z': {
5746  // 'z' maps to xzr or wzr so it needs an input of 0.
5747  if (!isNullConstant(Op))
5748  return;
5749 
5750  if (Op.getValueType() == MVT::i64)
5751  Result = DAG.getRegister(AArch64::XZR, MVT::i64);
5752  else
5753  Result = DAG.getRegister(AArch64::WZR, MVT::i32);
5754  break;
5755  }
5756  case 'S': {
5757  // An absolute symbolic address or label reference.
5758  if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5759  Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5760  GA->getValueType(0));
5761  } else if (const BlockAddressSDNode *BA =
5762  dyn_cast<BlockAddressSDNode>(Op)) {
5763  Result =
5764  DAG.getTargetBlockAddress(BA->getBlockAddress(), BA->getValueType(0));
5765  } else if (const ExternalSymbolSDNode *ES =
5766  dyn_cast<ExternalSymbolSDNode>(Op)) {
5767  Result =
5768  DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0));
5769  } else
5770  return;
5771  break;
5772  }
5773 
5774  case 'I':
5775  case 'J':
5776  case 'K':
5777  case 'L':
5778  case 'M':
5779  case 'N':
5781  if (!C)
5782  return;
5783 
5784  // Grab the value and do some validation.
5785  uint64_t CVal = C->getZExtValue();
5786  switch (ConstraintLetter) {
5787  // The I constraint applies only to simple ADD or SUB immediate operands:
5788  // i.e. 0 to 4095 with optional shift by 12
5789  // The J constraint applies only to ADD or SUB immediates that would be
5790  // valid when negated, i.e. if [an add pattern] were to be output as a SUB
5791  // instruction [or vice versa], in other words -1 to -4095 with optional
5792  // left shift by 12.
5793  case 'I':
5794  if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal))
5795  break;
5796  return;
5797  case 'J': {
5798  uint64_t NVal = -C->getSExtValue();
5799  if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) {
5800  CVal = C->getSExtValue();
5801  break;
5802  }
5803  return;
5804  }
5805  // The K and L constraints apply *only* to logical immediates, including
5806  // what used to be the MOVI alias for ORR (though the MOVI alias has now
5807  // been removed and MOV should be used). So these constraints have to
5808  // distinguish between bit patterns that are valid 32-bit or 64-bit
5809  // "bitmask immediates": for example 0xaaaaaaaa is a valid bimm32 (K), but
5810  // not a valid bimm64 (L) where 0xaaaaaaaaaaaaaaaa would be valid, and vice
5811  // versa.
5812  case 'K':
5813  if (AArch64_AM::isLogicalImmediate(CVal, 32))
5814  break;
5815  return;
5816  case 'L':
5817  if (AArch64_AM::isLogicalImmediate(CVal, 64))
5818  break;
5819  return;
5820  // The M and N constraints are a superset of K and L respectively, for use
5821  // with the MOV (immediate) alias. As well as the logical immediates they
5822  // also match 32 or 64-bit immediates that can be loaded either using a
5823  // *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca
5824  // (M) or 64-bit 0x1234000000000000 (N) etc.
5825  // As a note some of this code is liberally stolen from the asm parser.
5826  case 'M': {
5827  if (!isUInt<32>(CVal))
5828  return;
5829  if (AArch64_AM::isLogicalImmediate(CVal, 32))
5830  break;
5831  if ((CVal & 0xFFFF) == CVal)
5832  break;
5833  if ((CVal & 0xFFFF0000ULL) == CVal)
5834  break;
5835  uint64_t NCVal = ~(uint32_t)CVal;
5836  if ((NCVal & 0xFFFFULL) == NCVal)
5837  break;
5838  if ((NCVal & 0xFFFF0000ULL) == NCVal)
5839  break;
5840  return;
5841  }
5842  case 'N': {
5843  if (AArch64_AM::isLogicalImmediate(CVal, 64))
5844  break;
5845  if ((CVal & 0xFFFFULL) == CVal)
5846  break;
5847  if ((CVal & 0xFFFF0000ULL) == CVal)
5848  break;
5849  if ((CVal & 0xFFFF00000000ULL) == CVal)
5850  break;
5851  if ((CVal & 0xFFFF000000000000ULL) == CVal)
5852  break;
5853  uint64_t NCVal = ~CVal;
5854  if ((NCVal & 0xFFFFULL) == NCVal)
5855  break;
5856  if ((NCVal & 0xFFFF0000ULL) == NCVal)
5857  break;
5858  if ((NCVal & 0xFFFF00000000ULL) == NCVal)
5859  break;
5860  if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
5861  break;
5862  return;
5863  }
5864  default:
5865  return;
5866  }
5867 
5868  // All assembler immediates are 64-bit integers.
5869  Result = DAG.getTargetConstant(CVal, SDLoc(Op), MVT::i64);
5870  break;
5871  }
5872 
5873  if (Result.getNode()) {
5874  Ops.push_back(Result);
5875  return;
5876  }
5877 
5878  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
5879 }
5880 
5881 //===----------------------------------------------------------------------===//
5882 // AArch64 Advanced SIMD Support
5883 //===----------------------------------------------------------------------===//
5884 
5885 /// WidenVector - Given a value in the V64 register class, produce the
5886 /// equivalent value in the V128 register class.
5887 static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) {
5888  EVT VT = V64Reg.getValueType();
5889  unsigned NarrowSize = VT.getVectorNumElements();
5890  MVT EltTy = VT.getVectorElementType().getSimpleVT();
5891  MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
5892  SDLoc DL(V64Reg);
5893 
5894  return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
5895  V64Reg, DAG.getConstant(0, DL, MVT::i32));
5896 }
5897 
5898 /// getExtFactor - Determine the adjustment factor for the position when
5899 /// generating an "extract from vector registers" instruction.
5900 static unsigned getExtFactor(SDValue &V) {
5901  EVT EltType = V.getValueType().getVectorElementType();
5902  return EltType.getSizeInBits() / 8;
5903 }
5904 
5905 /// NarrowVector - Given a value in the V128 register class, produce the
5906 /// equivalent value in the V64 register class.
5907 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
5908  EVT VT = V128Reg.getValueType();
5909  unsigned WideSize = VT.getVectorNumElements();
5910  MVT EltTy = VT.getVectorElementType().getSimpleVT();
5911  MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
5912  SDLoc DL(V128Reg);
5913 
5914  return DAG.getTargetExtractSubreg(AArch64::dsub, DL, NarrowTy, V128Reg);
5915 }
5916 
5917 // Gather data to see if the operation can be modelled as a
5918 // shuffle in combination with VEXTs.
5920  SelectionDAG &DAG) const {
5921  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
5922  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::ReconstructShuffle\n");
5923  SDLoc dl(Op);
5924  EVT VT = Op.getValueType();
5925  unsigned NumElts = VT.getVectorNumElements();
5926 
5927  struct ShuffleSourceInfo {
5928  SDValue Vec;
5929  unsigned MinElt;
5930  unsigned MaxElt;
5931 
5932  // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
5933  // be compatible with the shuffle we intend to construct. As a result
5934  // ShuffleVec will be some sliding window into the original Vec.
5935  SDValue ShuffleVec;
5936 
5937  // Code should guarantee that element i in Vec starts at element "WindowBase
5938  // + i * WindowScale in ShuffleVec".
5939  int WindowBase;
5940  int WindowScale;
5941 
5942  ShuffleSourceInfo(SDValue Vec)
5943  : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
5944  ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
5945 
5946  bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
5947  };
5948 
5949  // First gather all vectors used as an immediate source for this BUILD_VECTOR
5950  // node.
5952  for (unsigned i = 0; i < NumElts; ++i) {
5953  SDValue V = Op.getOperand(i);
5954  if (V.isUndef())
5955  continue;
5956  else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
5957  !isa<ConstantSDNode>(V.getOperand(1))) {
5958  LLVM_DEBUG(
5959  dbgs() << "Reshuffle failed: "
5960  "a shuffle can only come from building a vector from "
5961  "various elements of other vectors, provided their "
5962  "indices are constant\n");
5963  return SDValue();
5964  }
5965 
5966  // Add this element source to the list if it's not already there.
5967  SDValue SourceVec = V.getOperand(0);
5968  auto Source = find(Sources, SourceVec);
5969  if (Source == Sources.end())
5970  Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
5971 
5972  // Update the minimum and maximum lane number seen.
5973  unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
5974  Source->MinElt = std::min(Source->MinElt, EltNo);
5975  Source->MaxElt = std::max(Source->MaxElt, EltNo);
5976  }
5977 
5978  if (Sources.size() > 2) {
5979  LLVM_DEBUG(
5980  dbgs() << "Reshuffle failed: currently only do something sane when at "
5981  "most two source vectors are involved\n");
5982  return SDValue();
5983  }
5984 
5985  // Find out the smallest element size among result and two sources, and use
5986  // it as element size to build the shuffle_vector.
5987  EVT SmallestEltTy = VT.getVectorElementType();
5988  for (auto &Source : Sources) {
5989  EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
5990  if (SrcEltTy.bitsLT(SmallestEltTy)) {
5991  SmallestEltTy = SrcEltTy;
5992  }
5993  }
5994  unsigned ResMultiplier =
5995  VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
5996  NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
5997  EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
5998 
5999  // If the source vector is too wide or too narrow, we may nevertheless be able
6000  // to construct a compatible shuffle either by concatenating it with UNDEF or
6001  // extracting a suitable range of elements.
6002  for (auto &Src : Sources) {
6003  EVT SrcVT = Src.ShuffleVec.getValueType();
6004 
6005  if (SrcVT.getSizeInBits() == VT.getSizeInBits())
6006  continue;
6007 
6008  // This stage of the search produces a source with the same element type as
6009  // the original, but with a total width matching the BUILD_VECTOR output.
6010  EVT EltVT = SrcVT.getVectorElementType();
6011  unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
6012  EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
6013 
6014  if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
6015  assert(2 * SrcVT.getSizeInBits() == VT.getSizeInBits());
6016  // We can pad out the smaller vector for free, so if it's part of a
6017  // shuffle...
6018  Src.ShuffleVec =
6019  DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
6020  DAG.getUNDEF(Src.ShuffleVec.getValueType()));
6021  continue;
6022  }
6023 
6024  assert(SrcVT.getSizeInBits() == 2 * VT.getSizeInBits());
6025 
6026  if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
6027  LLVM_DEBUG(
6028  dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n");
6029  return SDValue();
6030  }
6031 
6032  if (Src.MinElt >= NumSrcElts) {
6033  // The extraction can just take the second half
6034  Src.ShuffleVec =
6035  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6036  DAG.getConstant(NumSrcElts, dl, MVT::i64));
6037  Src.WindowBase = -NumSrcElts;
6038  } else if (Src.MaxElt < NumSrcElts) {
6039  // The extraction can just take the first half
6040  Src.ShuffleVec =
6041  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6042  DAG.getConstant(0, dl, MVT::i64));
6043  } else {
6044  // An actual VEXT is needed
6045  SDValue VEXTSrc1 =
6046  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6047  DAG.getConstant(0, dl, MVT::i64));
6048  SDValue VEXTSrc2 =
6049  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6050  DAG.getConstant(NumSrcElts, dl, MVT::i64));
6051  unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
6052 
6053  Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
6054  VEXTSrc2,
6055  DAG.getConstant(Imm, dl, MVT::i32));
6056  Src.WindowBase = -Src.MinElt;
6057  }
6058  }
6059 
6060  // Another possible incompatibility occurs from the vector element types. We
6061  // can fix this by bitcasting the source vectors to the same type we intend
6062  // for the shuffle.
6063  for (auto &Src : Sources) {
6064  EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
6065  if (SrcEltTy == SmallestEltTy)
6066  continue;
6067  assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
6068  Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
6069  Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
6070  Src.WindowBase *= Src.WindowScale;
6071  }
6072 
6073  // Final sanity check before we try to actually produce a shuffle.
6074  LLVM_DEBUG(for (auto Src
6075  : Sources)
6076  assert(Src.ShuffleVec.getValueType() == ShuffleVT););
6077 
6078  // The stars all align, our next step is to produce the mask for the shuffle.
6080  int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
6081  for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
6082  SDValue Entry = Op.getOperand(i);
6083  if (Entry.isUndef())
6084  continue;
6085 
6086  auto Src = find(Sources, Entry.getOperand(0));
6087  int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
6088 
6089  // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
6090  // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
6091  // segment.
6092  EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
6093  int BitsDefined =
6094  std::min(OrigEltTy.getSizeInBits(), VT.getScalarSizeInBits());
6095  int LanesDefined = BitsDefined / BitsPerShuffleLane;
6096 
6097  // This source is expected to fill ResMultiplier lanes of the final shuffle,
6098  // starting at the appropriate offset.
6099  int *LaneMask = &Mask[i * ResMultiplier];
6100 
6101  int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
6102  ExtractBase += NumElts * (Src - Sources.begin());
6103  for (int j = 0; j < LanesDefined; ++j)
6104  LaneMask[j] = ExtractBase + j;
6105  }
6106 
6107  // Final check before we try to produce nonsense...
6108  if (!isShuffleMaskLegal(Mask, ShuffleVT)) {
6109  LLVM_DEBUG(dbgs() << "Reshuffle failed: illegal shuffle mask\n");
6110  return SDValue();
6111  }
6112 
6113  SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
6114  for (unsigned i = 0; i < Sources.size(); ++i)
6115  ShuffleOps[i] = Sources[i].ShuffleVec;
6116 
6117  SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
6118  ShuffleOps[1], Mask);
6119  SDValue V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
6120 
6121  LLVM_DEBUG(dbgs() << "Reshuffle, creating node: "; Shuffle.dump();
6122  dbgs() << "Reshuffle, creating node: "; V.dump(););
6123 
6124  return V;
6125 }
6126 
6127 // check if an EXT instruction can handle the shuffle mask when the
6128 // vector sources of the shuffle are the same.
6129 static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
6130  unsigned NumElts = VT.getVectorNumElements();
6131 
6132  // Assume that the first shuffle index is not UNDEF. Fail if it is.
6133  if (M[0] < 0)
6134  return false;
6135 
6136  Imm = M[0];
6137 
6138  // If this is a VEXT shuffle, the immediate value is the index of the first
6139  // element. The other shuffle indices must be the successive elements after
6140  // the first one.
6141  unsigned ExpectedElt = Imm;
6142  for (unsigned i = 1; i < NumElts; ++i) {
6143  // Increment the expected index. If it wraps around, just follow it
6144  // back to index zero and keep going.
6145  ++ExpectedElt;
6146  if (ExpectedElt == NumElts)
6147  ExpectedElt = 0;
6148 
6149  if (M[i] < 0)
6150  continue; // ignore UNDEF indices
6151  if (ExpectedElt != static_cast<unsigned>(M[i]))
6152  return false;
6153  }
6154 
6155  return true;
6156 }
6157 
6158 // check if an EXT instruction can handle the shuffle mask when the
6159 // vector sources of the shuffle are different.
6160 static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
6161  unsigned &Imm) {
6162  // Look for the first non-undef element.
6163  const int *FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
6164 
6165  // Benefit form APInt to handle overflow when calculating expected element.
6166  unsigned NumElts = VT.getVectorNumElements();
6167  unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
6168  APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
6169  // The following shuffle indices must be the successive elements after the
6170  // first real element.
6171  const int *FirstWrongElt = std::find_if(FirstRealElt + 1, M.end(),
6172  [&](int Elt) {return Elt != ExpectedElt++ && Elt != -1;});
6173  if (FirstWrongElt != M.end())
6174  return false;
6175 
6176  // The index of an EXT is the first element if it is not UNDEF.
6177  // Watch out for the beginning UNDEFs. The EXT index should be the expected
6178  // value of the first element. E.g.
6179  // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
6180  // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
6181  // ExpectedElt is the last mask index plus 1.
6182  Imm = ExpectedElt.getZExtValue();
6183 
6184  // There are two difference cases requiring to reverse input vectors.
6185  // For example, for vector <4 x i32> we have the following cases,
6186  // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
6187  // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
6188  // For both cases, we finally use mask <5, 6, 7, 0>, which requires
6189  // to reverse two input vectors.
6190  if (Imm < NumElts)
6191  ReverseEXT = true;
6192  else
6193  Imm -= NumElts;
6194 
6195  return true;
6196 }
6197 
6198 /// isREVMask - Check if a vector shuffle corresponds to a REV
6199 /// instruction with the specified blocksize. (The order of the elements
6200 /// within each block of the vector is reversed.)
6201 static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
6202  assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
6203  "Only possible block sizes for REV are: 16, 32, 64");
6204 
6205  unsigned EltSz = VT.getScalarSizeInBits();
6206  if (EltSz == 64)
6207  return false;
6208 
6209  unsigned NumElts = VT.getVectorNumElements();
6210  unsigned BlockElts = M[0] + 1;
6211  // If the first shuffle index is UNDEF, be optimistic.
6212  if (M[0] < 0)
6213  BlockElts = BlockSize / EltSz;
6214 
6215  if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
6216  return false;
6217 
6218  for (unsigned i = 0; i < NumElts; ++i) {
6219  if (M[i] < 0)
6220  continue; // ignore UNDEF indices
6221  if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
6222  return false;
6223  }
6224 
6225  return true;
6226 }
6227 
6228 static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
6229  unsigned NumElts = VT.getVectorNumElements();
6230  WhichResult = (M[0] == 0 ? 0 : 1);
6231  unsigned Idx = WhichResult * NumElts / 2;
6232  for (unsigned i = 0; i != NumElts; i += 2) {
6233  if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
6234  (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts))
6235  return false;
6236  Idx += 1;
6237  }
6238 
6239  return true;
6240 }
6241 
6242 static bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
6243  unsigned NumElts = VT.getVectorNumElements();
6244  WhichResult = (M[0] == 0 ? 0 : 1);
6245  for (unsigned i = 0; i != NumElts; ++i) {
6246  if (M[i] < 0)
6247  continue; // ignore UNDEF indices
6248  if ((unsigned)M[i] != 2 * i + WhichResult)
6249  return false;
6250  }
6251 
6252  return true;
6253 }
6254 
6255 static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
6256  unsigned NumElts = VT.getVectorNumElements();
6257  WhichResult = (M[0] == 0 ? 0 : 1);
6258  for (unsigned i = 0; i < NumElts; i += 2) {
6259  if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
6260  (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult))
6261  return false;
6262  }
6263  return true;
6264 }
6265 
6266 /// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of
6267 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
6268 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
6269 static bool isZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
6270  unsigned NumElts = VT.getVectorNumElements();
6271  WhichResult = (M[0] == 0 ? 0 : 1);
6272  unsigned Idx = WhichResult * NumElts / 2;
6273  for (unsigned i = 0; i != NumElts; i += 2) {
6274  if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
6275  (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx))
6276  return false;
6277  Idx += 1;
6278  }
6279 
6280  return true;
6281 }
6282 
6283 /// isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of
6284 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
6285 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
6286 static bool isUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
6287  unsigned Half = VT.getVectorNumElements() / 2;
6288  WhichResult = (M[0] == 0 ? 0 : 1);
6289  for (unsigned j = 0; j != 2; ++j) {
6290  unsigned Idx = WhichResult;
6291  for (unsigned i = 0; i != Half; ++i) {
6292  int MIdx = M[i + j * Half];
6293  if (MIdx >= 0 && (unsigned)MIdx != Idx)
6294  return false;
6295  Idx += 2;
6296  }
6297  }
6298 
6299  return true;
6300 }
6301 
6302 /// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of
6303 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
6304 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
6305 static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
6306  unsigned NumElts = VT.getVectorNumElements();
6307  WhichResult = (M[0] == 0 ? 0 : 1);
6308  for (unsigned i = 0; i < NumElts; i += 2) {
6309  if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
6310  (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + WhichResult))
6311  return false;
6312  }
6313  return true;
6314 }
6315 
6316 static bool isINSMask(ArrayRef<int> M, int NumInputElements,
6317  bool &DstIsLeft, int &Anomaly) {
6318  if (M.size() != static_cast<size_t>(NumInputElements))
6319  return false;
6320 
6321  int NumLHSMatch = 0, NumRHSMatch = 0;
6322  int LastLHSMismatch = -1, LastRHSMismatch = -1;
6323 
6324  for (int i = 0; i < NumInputElements; ++i) {
6325  if (M[i] == -1) {
6326  ++NumLHSMatch;
6327  ++NumRHSMatch;
6328  continue;
6329  }
6330 
6331  if (M[i] == i)
6332  ++NumLHSMatch;
6333  else
6334  LastLHSMismatch = i;
6335 
6336  if (M[i] == i + NumInputElements)
6337  ++NumRHSMatch;
6338  else
6339  LastRHSMismatch = i;
6340  }
6341 
6342  if (NumLHSMatch == NumInputElements - 1) {
6343  DstIsLeft = true;
6344  Anomaly = LastLHSMismatch;
6345  return true;
6346  } else if (NumRHSMatch == NumInputElements - 1) {
6347  DstIsLeft = false;
6348  Anomaly = LastRHSMismatch;
6349  return true;
6350  }
6351 
6352  return false;
6353 }
6354 
6355 static bool isConcatMask(ArrayRef<int> Mask, EVT VT, bool SplitLHS) {
6356  if (VT.getSizeInBits() != 128)
6357  return false;
6358 
6359  unsigned NumElts = VT.getVectorNumElements();
6360 
6361  for (int I = 0, E = NumElts / 2; I != E; I++) {
6362  if (Mask[I] != I)
6363  return false;
6364  }
6365 
6366  int Offset = NumElts / 2;
6367  for (int I = NumElts / 2, E = NumElts; I != E; I++) {
6368  if (Mask[I] != I + SplitLHS * Offset)
6369  return false;
6370  }
6371 
6372  return true;
6373 }
6374 
6376  SDLoc DL(Op);
6377  EVT VT = Op.getValueType();
6378  SDValue V0 = Op.getOperand(0);
6379  SDValue V1 = Op.getOperand(1);
6380  ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
6381 
6384  return SDValue();
6385 
6386  bool SplitV0 = V0.getValueSizeInBits() == 128;
6387 
6388  if (!isConcatMask(Mask, VT, SplitV0))
6389  return SDValue();
6390 
6391  EVT CastVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
6392  VT.getVectorNumElements() / 2);
6393  if (SplitV0) {
6394  V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
6395  DAG.getConstant(0, DL, MVT::i64));
6396  }
6397  if (V1.getValueSizeInBits() == 128) {
6398  V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
6399  DAG.getConstant(0, DL, MVT::i64));
6400  }
6401  return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
6402 }
6403 
6404 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
6405 /// the specified operations to build the shuffle.
6406 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
6407  SDValue RHS, SelectionDAG &DAG,
6408  const SDLoc &dl) {
6409  unsigned OpNum = (PFEntry >> 26) & 0x0F;
6410  unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
6411  unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
6412 
6413  enum {
6414  OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
6415  OP_VREV,
6416  OP_VDUP0,
6417  OP_VDUP1,
6418  OP_VDUP2,
6419  OP_VDUP3,
6420  OP_VEXT1,
6421  OP_VEXT2,
6422  OP_VEXT3,
6423  OP_VUZPL, // VUZP, left result
6424  OP_VUZPR, // VUZP, right result
6425  OP_VZIPL, // VZIP, left result
6426  OP_VZIPR, // VZIP, right result
6427  OP_VTRNL, // VTRN, left result
6428  OP_VTRNR // VTRN, right result
6429  };
6430 
6431  if (OpNum == OP_COPY) {
6432  if (LHSID == (1 * 9 + 2) * 9 + 3)
6433  return LHS;
6434  assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && "Illegal OP_COPY!");
6435  return RHS;
6436  }
6437 
6438  SDValue OpLHS, OpRHS;
6439  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
6440  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
6441  EVT VT = OpLHS.getValueType();
6442 
6443  switch (OpNum) {
6444  default:
6445  llvm_unreachable("Unknown shuffle opcode!");
6446  case OP_VREV:
6447  // VREV divides the vector in half and swaps within the half.
6448  if (VT.getVectorElementType() == MVT::i32 ||
6450  return DAG.getNode(AArch64ISD::REV64, dl, VT, OpLHS);
6451  // vrev <4 x i16> -> REV32
6452  if (VT.getVectorElementType() == MVT::i16 ||
6454  return DAG.getNode(AArch64ISD::REV32, dl, VT, OpLHS);
6455  // vrev <4 x i8> -> REV16
6457  return DAG.getNode(AArch64ISD::REV16, dl, VT, OpLHS);
6458  case OP_VDUP0:
6459  case OP_VDUP1:
6460  case OP_VDUP2:
6461  case OP_VDUP3: {
6462  EVT EltTy = VT.getVectorElementType();
6463  unsigned Opcode;
6464  if (EltTy == MVT::i8)
6465  Opcode = AArch64ISD::DUPLANE8;
6466  else if (EltTy == MVT::i16 || EltTy == MVT::f16)
6467  Opcode = AArch64ISD::DUPLANE16;
6468  else if (EltTy == MVT::i32 || EltTy == MVT::f32)
6469  Opcode = AArch64ISD::DUPLANE32;
6470  else if (EltTy == MVT::i64 || EltTy == MVT::f64)
6471  Opcode = AArch64ISD::DUPLANE64;
6472  else
6473  llvm_unreachable("Invalid vector element type?");
6474 
6475  if (VT.getSizeInBits() == 64)
6476  OpLHS = WidenVector(OpLHS, DAG);
6477  SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64);
6478  return DAG.getNode(Opcode, dl, VT, OpLHS, Lane);
6479  }
6480  case OP_VEXT1:
6481  case OP_VEXT2:
6482  case OP_VEXT3: {
6483  unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
6484  return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS,
6485  DAG.getConstant(Imm, dl, MVT::i32));
6486  }
6487  case OP_VUZPL:
6488  return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS,
6489  OpRHS);
6490  case OP_VUZPR:
6491  return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS,
6492  OpRHS);
6493  case OP_VZIPL:
6494  return DAG.getNode(AArch64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS,
6495  OpRHS);
6496  case OP_VZIPR:
6497  return DAG.getNode(AArch64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS,
6498  OpRHS);
6499  case OP_VTRNL:
6500  return DAG.getNode(AArch64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS,
6501  OpRHS);
6502  case OP_VTRNR:
6503  return DAG.getNode(AArch64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS,
6504  OpRHS);
6505  }
6506 }
6507 
6508 static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
6509  SelectionDAG &DAG) {
6510  // Check to see if we can use the TBL instruction.
6511  SDValue V1 = Op.getOperand(0);
6512  SDValue V2 = Op.getOperand(1);
6513  SDLoc DL(Op);
6514 
6515  EVT EltVT = Op.getValueType().getVectorElementType();
6516  unsigned BytesPerElt = EltVT.getSizeInBits() / 8;
6517 
6518  SmallVector<SDValue, 8> TBLMask;
6519  for (int Val : ShuffleMask) {
6520  for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
6521  unsigned Offset = Byte + Val * BytesPerElt;
6522  TBLMask.push_back(DAG.getConstant(Offset, DL, MVT::i32));
6523  }
6524  }
6525 
6526  MVT IndexVT = MVT::v8i8;
6527  unsigned IndexLen = 8;
6528  if (Op.getValueSizeInBits() == 128) {
6529  IndexVT = MVT::v16i8;
6530  IndexLen = 16;
6531  }
6532 
6533  SDValue V1Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V1);
6534  SDValue V2Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V2);
6535 
6536  SDValue Shuffle;
6537  if (V2.getNode()->isUndef()) {
6538  if (IndexLen == 8)
6539  V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst);
6540  Shuffle = DAG.getNode(
6541  ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
6543  DAG.getBuildVector(IndexVT, DL,
6544  makeArrayRef(TBLMask.data(), IndexLen)));
6545  } else {
6546  if (IndexLen == 8) {
6547  V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst);
6548  Shuffle = DAG.getNode(
6549  ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
6551  DAG.getBuildVector(IndexVT, DL,
6552  makeArrayRef(TBLMask.data(), IndexLen)));
6553  } else {
6554  // FIXME: We cannot, for the moment, emit a TBL2 instruction because we
6555  // cannot currently represent the register constraints on the input
6556  // table registers.
6557  // Shuffle = DAG.getNode(AArch64ISD::TBL2, DL, IndexVT, V1Cst, V2Cst,
6558  // DAG.getBuildVector(IndexVT, DL, &TBLMask[0],
6559  // IndexLen));
6560  Shuffle = DAG.getNode(
6561  ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
6563  V2Cst, DAG.getBuildVector(IndexVT, DL,
6564  makeArrayRef(TBLMask.data(), IndexLen)));
6565  }
6566  }
6567  return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Shuffle);
6568 }
6569 
6570 static unsigned getDUPLANEOp(EVT EltType) {
6571  if (EltType == MVT::i8)
6572  return AArch64ISD::DUPLANE8;
6573  if (EltType == MVT::i16 || EltType == MVT::f16)
6574  return AArch64ISD::DUPLANE16;
6575  if (EltType == MVT::i32 || EltType == MVT::f32)
6576  return AArch64ISD::DUPLANE32;
6577  if (EltType == MVT::i64 || EltType == MVT::f64)
6578  return AArch64ISD::DUPLANE64;
6579 
6580  llvm_unreachable("Invalid vector element type?");
6581 }
6582 
6583 SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
6584  SelectionDAG &DAG) const {
6585  SDLoc dl(Op);
6586  EVT VT = Op.getValueType();
6587 
6588  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
6589 
6590  // Convert shuffles that are directly supported on NEON to target-specific
6591  // DAG nodes, instead of keeping them as shuffles and matching them again
6592  // during code selection. This is more efficient and avoids the possibility
6593  // of inconsistencies between legalization and selection.
6594  ArrayRef<int> ShuffleMask = SVN->getMask();
6595 
6596  SDValue V1 = Op.getOperand(0);
6597  SDValue V2 = Op.getOperand(1);
6598 
6599  if (SVN->isSplat()) {
6600  int Lane = SVN->getSplatIndex();
6601  // If this is undef splat, generate it via "just" vdup, if possible.
6602  if (Lane == -1)
6603  Lane = 0;
6604 
6605  if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
6606  return DAG.getNode(AArch64ISD::DUP, dl, V1.getValueType(),
6607  V1.getOperand(0));
6608  // Test if V1 is a BUILD_VECTOR and the lane being referenced is a non-
6609  // constant. If so, we can just reference the lane's definition directly.
6610  if (V1.getOpcode() == ISD::BUILD_VECTOR &&
6611  !isa<ConstantSDNode>(V1.getOperand(Lane)))
6612  return DAG.getNode(AArch64ISD::DUP, dl, VT, V1.getOperand(Lane));
6613 
6614  // Otherwise, duplicate from the lane of the input vector.
6615  unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType());
6616 
6617  // SelectionDAGBuilder may have "helpfully" already extracted or conatenated
6618  // to make a vector of the same size as this SHUFFLE. We can ignore the
6619  // extract entirely, and canonicalise the concat using WidenVector.
6620  if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
6621  Lane += cast<ConstantSDNode>(V1.getOperand(1))->getZExtValue();
6622  V1 = V1.getOperand(0);
6623  } else if (V1.getOpcode() == ISD::CONCAT_VECTORS) {
6624  unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2;
6625  Lane -= Idx * VT.getVectorNumElements() / 2;
6626  V1 = WidenVector(V1.getOperand(Idx), DAG);
6627  } else if (VT.getSizeInBits() == 64)
6628  V1 = WidenVector(V1, DAG);
6629 
6630  return DAG.getNode(Opcode, dl, VT, V1, DAG.getConstant(Lane, dl, MVT::i64));
6631  }
6632 
6633  if (isREVMask(ShuffleMask, VT, 64))
6634  return DAG.getNode(AArch64ISD::REV64, dl, V1.getValueType(), V1, V2);
6635  if (isREVMask(ShuffleMask, VT, 32))
6636  return DAG.getNode(AArch64ISD::REV32, dl, V1.getValueType(), V1, V2);
6637  if (isREVMask(ShuffleMask, VT, 16))
6638  return DAG.getNode(AArch64ISD::REV16, dl, V1.getValueType(), V1, V2);
6639 
6640  bool ReverseEXT = false;
6641  unsigned Imm;
6642  if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
6643  if (ReverseEXT)
6644  std::swap(V1, V2);
6645  Imm *= getExtFactor(V1);
6646  return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V2,
6647  DAG.getConstant(Imm, dl, MVT::i32));
6648  } else if (V2->isUndef() && isSingletonEXTMask(ShuffleMask, VT, Imm)) {
6649  Imm *= getExtFactor(V1);
6650  return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V1,
6651  DAG.getConstant(Imm, dl, MVT::i32));
6652  }
6653 
6654  unsigned WhichResult;
6655  if (isZIPMask(ShuffleMask, VT, WhichResult)) {
6656  unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
6657  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
6658  }
6659  if (isUZPMask(ShuffleMask, VT, WhichResult)) {
6660  unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
6661  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
6662  }
6663  if (isTRNMask(ShuffleMask, VT, WhichResult)) {
6664  unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
6665  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
6666  }
6667 
6668  if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
6669  unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
6670  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
6671  }
6672  if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
6673  unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
6674  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
6675  }
6676  if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
6677  unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
6678  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
6679  }
6680 
6681  if (SDValue Concat = tryFormConcatFromShuffle(Op, DAG))
6682  return Concat;
6683 
6684  bool DstIsLeft;
6685  int Anomaly;
6686  int NumInputElements = V1.getValueType().getVectorNumElements();
6687  if (isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
6688  SDValue DstVec = DstIsLeft ? V1 : V2;
6689  SDValue DstLaneV = DAG.getConstant(Anomaly, dl, MVT::i64);
6690 
6691  SDValue SrcVec = V1;
6692  int SrcLane = ShuffleMask[Anomaly];
6693  if (SrcLane >= NumInputElements) {
6694  SrcVec = V2;
6695  SrcLane -= VT.getVectorNumElements();
6696  }
6697  SDValue SrcLaneV = DAG.getConstant(SrcLane, dl, MVT::i64);
6698 
6699  EVT ScalarVT = VT.getVectorElementType();
6700 
6701  if (ScalarVT.getSizeInBits() < 32 && ScalarVT.isInteger())
6702  ScalarVT = MVT::i32;
6703 
6704  return DAG.getNode(
6705  ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
6706  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, SrcVec, SrcLaneV),
6707  DstLaneV);
6708  }
6709 
6710  // If the shuffle is not directly supported and it has 4 elements, use
6711  // the PerfectShuffle-generated table to synthesize it from other shuffles.
6712  unsigned NumElts = VT.getVectorNumElements();
6713  if (NumElts == 4) {
6714  unsigned PFIndexes[4];
6715  for (unsigned i = 0; i != 4; ++i) {
6716  if (ShuffleMask[i] < 0)
6717  PFIndexes[i] = 8;
6718  else
6719  PFIndexes[i] = ShuffleMask[i];
6720  }
6721 
6722  // Compute the index in the perfect shuffle table.
6723  unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
6724  PFIndexes[2] * 9 + PFIndexes[3];
6725  unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
6726  unsigned Cost = (PFEntry >> 30);
6727 
6728  if (Cost <= 4)
6729  return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
6730  }
6731 
6732  return GenerateTBL(Op, ShuffleMask, DAG);
6733 }
6734 
6735 static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
6736  APInt &UndefBits) {
6737  EVT VT = BVN->getValueType(0);
6738  APInt SplatBits, SplatUndef;
6739  unsigned SplatBitSize;
6740  bool HasAnyUndefs;
6741  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
6742  unsigned NumSplats = VT.getSizeInBits() / SplatBitSize;
6743 
6744  for (unsigned i = 0; i < NumSplats; ++i) {
6745  CnstBits <<= SplatBitSize;
6746  UndefBits <<= SplatBitSize;
6747  CnstBits |= SplatBits.zextOrTrunc(VT.getSizeInBits());
6748  UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.getSizeInBits());
6749  }
6750 
6751  return true;
6752  }
6753 
6754  return false;
6755 }
6756 
6757 // Try 64-bit splatted SIMD immediate.
6758 static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
6759  const APInt &Bits) {
6760  if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
6761  uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
6762  EVT VT = Op.getValueType();
6763  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v2i64 : MVT::f64;
6764 
6765  if (AArch64_AM::isAdvSIMDModImmType10(Value)) {
6767 
6768  SDLoc dl(Op);
6769  SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
6770  DAG.getConstant(Value, dl, MVT::i32));
6771  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6772  }
6773  }
6774 
6775  return SDValue();
6776 }
6777 
6778 // Try 32-bit splatted SIMD immediate.
6779 static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
6780  const APInt &Bits,
6781  const SDValue *LHS = nullptr) {
6782  if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
6783  uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
6784  EVT VT = Op.getValueType();
6785  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6786  bool isAdvSIMDModImm = false;
6787  uint64_t Shift;
6788 
6789  if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType1(Value))) {
6790  Value = AArch64_AM::encodeAdvSIMDModImmType1(Value);
6791  Shift = 0;
6792  }
6793  else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType2(Value))) {
6794  Value = AArch64_AM::encodeAdvSIMDModImmType2(Value);
6795  Shift = 8;
6796  }
6797  else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType3(Value))) {
6798  Value = AArch64_AM::encodeAdvSIMDModImmType3(Value);
6799  Shift = 16;
6800  }
6801  else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType4(Value))) {
6802  Value = AArch64_AM::encodeAdvSIMDModImmType4(Value);
6803  Shift = 24;
6804  }
6805 
6806  if (isAdvSIMDModImm) {
6807  SDLoc dl(Op);
6808  SDValue Mov;
6809 
6810  if (LHS)
6811  Mov = DAG.getNode(NewOp, dl, MovTy, *LHS,
6812  DAG.getConstant(Value, dl, MVT::i32),
6813  DAG.getConstant(Shift, dl, MVT::i32));
6814  else
6815  Mov = DAG.getNode(NewOp, dl, MovTy,
6816  DAG.getConstant(Value, dl, MVT::i32),
6817  DAG.getConstant(Shift, dl, MVT::i32));
6818 
6819  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6820  }
6821  }
6822 
6823  return SDValue();
6824 }
6825 
6826 // Try 16-bit splatted SIMD immediate.
6827 static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
6828  const APInt &Bits,
6829  const SDValue *LHS = nullptr) {
6830  if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
6831  uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
6832  EVT VT = Op.getValueType();
6833  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
6834  bool isAdvSIMDModImm = false;
6835  uint64_t Shift;
6836 
6837  if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType5(Value))) {
6838  Value = AArch64_AM::encodeAdvSIMDModImmType5(Value);
6839  Shift = 0;
6840  }
6841  else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType6(Value))) {
6842  Value = AArch64_AM::encodeAdvSIMDModImmType6(Value);
6843  Shift = 8;
6844  }
6845 
6846  if (isAdvSIMDModImm) {
6847  SDLoc dl(Op);
6848  SDValue Mov;
6849 
6850  if (LHS)
6851  Mov = DAG.getNode(NewOp, dl, MovTy, *LHS,
6852  DAG.getConstant(Value, dl, MVT::i32),
6853  DAG.getConstant(Shift, dl, MVT::i32));
6854  else
6855  Mov = DAG.getNode(NewOp, dl, MovTy,
6856  DAG.getConstant(Value, dl, MVT::i32),
6857  DAG.getConstant(Shift, dl, MVT::i32));
6858 
6859  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6860  }
6861  }
6862 
6863  return SDValue();
6864 }
6865 
6866 // Try 32-bit splatted SIMD immediate with shifted ones.
6867 static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op,
6868  SelectionDAG &DAG, const APInt &Bits) {
6869  if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
6870  uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
6871  EVT VT = Op.getValueType();
6872  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6873  bool isAdvSIMDModImm = false;
6874  uint64_t Shift;
6875 
6876  if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType7(Value))) {
6877  Value = AArch64_AM::encodeAdvSIMDModImmType7(Value);
6878  Shift = 264;
6879  }
6880  else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType8(Value))) {
6881  Value = AArch64_AM::encodeAdvSIMDModImmType8(Value);
6882  Shift = 272;
6883  }
6884 
6885  if (isAdvSIMDModImm) {
6886  SDLoc dl(Op);
6887  SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
6888  DAG.getConstant(Value, dl, MVT::i32),
6889  DAG.getConstant(Shift, dl, MVT::i32));
6890  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6891  }
6892  }
6893 
6894  return SDValue();
6895 }
6896 
6897 // Try 8-bit splatted SIMD immediate.
6898 static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
6899  const APInt &Bits) {
6900  if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
6901  uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
6902  EVT VT = Op.getValueType();
6903  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8;
6904 
6905  if (AArch64_AM::isAdvSIMDModImmType9(Value)) {
6906  Value = AArch64_AM::encodeAdvSIMDModImmType9(Value);
6907 
6908  SDLoc dl(Op);
6909  SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
6910  DAG.getConstant(Value, dl, MVT::i32));
6911  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6912  }
6913  }
6914 
6915  return SDValue();
6916 }
6917 
6918 // Try FP splatted SIMD immediate.
6919 static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
6920  const APInt &Bits) {
6921  if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
6922  uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
6923  EVT VT = Op.getValueType();
6924  bool isWide = (VT.getSizeInBits() == 128);
6925  MVT MovTy;
6926  bool isAdvSIMDModImm = false;
6927 
6928  if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType11(Value))) {
6930  MovTy = isWide ? MVT::v4f32 : MVT::v2f32;
6931  }
6932  else if (isWide &&
6933  (isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType12(Value))) {
6935  MovTy = MVT::v2f64;
6936  }
6937 
6938  if (isAdvSIMDModImm) {
6939  SDLoc dl(Op);
6940  SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
6941  DAG.getConstant(Value, dl, MVT::i32));
6942  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6943  }
6944  }
6945 
6946  return SDValue();
6947 }
6948 
6949 SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
6950  SelectionDAG &DAG) const {
6951  SDValue LHS = Op.getOperand(0);
6952  EVT VT = Op.getValueType();
6953 
6954  BuildVectorSDNode *BVN =
6956  if (!BVN) {
6957  // AND commutes, so try swapping the operands.
6958  LHS = Op.getOperand(1);
6959  BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(0).getNode());
6960  }
6961  if (!BVN)
6962  return Op;
6963 
6964  APInt DefBits(VT.getSizeInBits(), 0);
6965  APInt UndefBits(VT.getSizeInBits(), 0);
6966  if (resolveBuildVector(BVN, DefBits, UndefBits)) {
6967  SDValue NewOp;
6968 
6969  // We only have BIC vector immediate instruction, which is and-not.
6970  DefBits = ~DefBits;
6971  if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, Op, DAG,
6972  DefBits, &LHS)) ||
6973  (NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, Op, DAG,
6974  DefBits, &LHS)))
6975  return NewOp;
6976 
6977  UndefBits = ~UndefBits;
6978  if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, Op, DAG,
6979  UndefBits, &LHS)) ||
6980  (NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, Op, DAG,
6981  UndefBits, &LHS)))
6982  return NewOp;
6983  }
6984 
6985  // We can always fall back to a non-immediate AND.
6986  return Op;
6987 }
6988 
6989 // Specialized code to quickly find if PotentialBVec is a BuildVector that
6990 // consists of only the same constant int value, returned in reference arg
6991 // ConstVal
6992 static bool isAllConstantBuildVector(const SDValue &PotentialBVec,
6993  uint64_t &ConstVal) {
6994  BuildVectorSDNode *Bvec = dyn_cast<BuildVectorSDNode>(PotentialBVec);
6995  if (!Bvec)
6996  return false;
6997  ConstantSDNode *FirstElt = dyn_cast<ConstantSDNode>(Bvec->getOperand(0));
6998  if (!FirstElt)
6999  return false;
7000  EVT VT = Bvec->getValueType(0);
7001  unsigned NumElts = VT.getVectorNumElements();
7002  for (unsigned i = 1; i < NumElts; ++i)
7003  if (dyn_cast<ConstantSDNode>(Bvec->getOperand(i)) != FirstElt)
7004  return false;
7005  ConstVal = FirstElt->getZExtValue();
7006  return true;
7007 }
7008 
7009 static unsigned getIntrinsicID(const SDNode *N) {
7010  unsigned Opcode = N->getOpcode();
7011  switch (Opcode) {
7012  default:
7013  return Intrinsic::not_intrinsic;
7014  case ISD::INTRINSIC_WO_CHAIN: {
7015  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
7016  if (IID < Intrinsic::num_intrinsics)
7017  return IID;
7018  return Intrinsic::not_intrinsic;
7019  }
7020  }
7021 }
7022 
7023 // Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)),
7024 // to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a
7025 // BUILD_VECTORs with constant element C1, C2 is a constant, and C1 == ~C2.
7026 // Also, logical shift right -> sri, with the same structure.
7028  EVT VT = N->getValueType(0);
7029 
7030  if (!VT.isVector())
7031  return SDValue();
7032 
7033  SDLoc DL(N);
7034 
7035  // Is the first op an AND?
7036  const SDValue And = N->getOperand(0);
7037  if (And.getOpcode() != ISD::AND)
7038  return SDValue();
7039 
7040  // Is the second op an shl or lshr?
7041  SDValue Shift = N->getOperand(1);
7042  // This will have been turned into: AArch64ISD::VSHL vector, #shift
7043  // or AArch64ISD::VLSHR vector, #shift
7044  unsigned ShiftOpc = Shift.getOpcode();
7045  if ((ShiftOpc != AArch64ISD::VSHL && ShiftOpc != AArch64ISD::VLSHR))
7046  return SDValue();
7047  bool IsShiftRight = ShiftOpc == AArch64ISD::VLSHR;
7048 
7049  // Is the shift amount constant?
7050  ConstantSDNode *C2node = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
7051  if (!C2node)
7052  return SDValue();
7053 
7054  // Is the and mask vector all constant?
7055  uint64_t C1;
7056  if (!isAllConstantBuildVector(And.getOperand(1), C1))
7057  return SDValue();
7058 
7059  // Is C1 == ~C2, taking into account how much one can shift elements of a
7060  // particular size?
7061  uint64_t C2 = C2node->getZExtValue();
7062  unsigned ElemSizeInBits = VT.getScalarSizeInBits();
7063  if (C2 > ElemSizeInBits)
7064  return SDValue();
7065  unsigned ElemMask = (1 << ElemSizeInBits) - 1;
7066  if ((C1 & ElemMask) != (~C2 & ElemMask))
7067  return SDValue();
7068 
7069  SDValue X = And.getOperand(0);
7070  SDValue Y = Shift.getOperand(0);
7071 
7072  unsigned Intrin =
7074  SDValue ResultSLI =
7075  DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
7076  DAG.getConstant(Intrin, DL, MVT::i32), X, Y,
7077  Shift.getOperand(1));
7078 
7079  LLVM_DEBUG(dbgs() << "aarch64-lower: transformed: \n");
7080  LLVM_DEBUG(N->dump(&DAG));
7081  LLVM_DEBUG(dbgs() << "into: \n");
7082  LLVM_DEBUG(ResultSLI->dump(&DAG));
7083 
7084  ++NumShiftInserts;
7085  return ResultSLI;
7086 }
7087 
7088 SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
7089  SelectionDAG &DAG) const {
7090  // Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
7092  if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
7093  return Res;
7094  }
7095 
7096  EVT VT = Op.getValueType();
7097 
7098  SDValue LHS = Op.getOperand(0);
7099  BuildVectorSDNode *BVN =
7101  if (!BVN) {
7102  // OR commutes, so try swapping the operands.
7103  LHS = Op.getOperand(1);
7104  BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(0).getNode());
7105  }
7106  if (!BVN)
7107  return Op;
7108 
7109  APInt DefBits(VT.getSizeInBits(), 0);
7110  APInt UndefBits(VT.getSizeInBits(), 0);
7111  if (resolveBuildVector(BVN, DefBits, UndefBits)) {
7112  SDValue NewOp;
7113 
7114  if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG,
7115  DefBits, &LHS)) ||
7116  (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG,
7117  DefBits, &LHS)))
7118  return NewOp;
7119 
7120  if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG,
7121  UndefBits, &LHS)) ||
7122  (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG,
7123  UndefBits, &LHS)))
7124  return NewOp;
7125  }
7126 
7127  // We can always fall back to a non-immediate OR.
7128  return Op;
7129 }
7130 
7131 // Normalize the operands of BUILD_VECTOR. The value of constant operands will
7132 // be truncated to fit element width.
7134  SelectionDAG &DAG) {
7135  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
7136  SDLoc dl(Op);
7137  EVT VT = Op.getValueType();
7138  EVT EltTy= VT.getVectorElementType();
7139 
7140  if (EltTy.isFloatingPoint() || EltTy.getSizeInBits() > 16)
7141  return Op;
7142 
7144  for (SDValue Lane : Op->ops()) {
7145  // For integer vectors, type legalization would have promoted the
7146  // operands already. Otherwise, if Op is a floating-point splat
7147  // (with operands cast to integers), then the only possibilities
7148  // are constants and UNDEFs.
7149  if (auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) {
7150  APInt LowBits(EltTy.getSizeInBits(),
7151  CstLane->getZExtValue());
7152  Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32);
7153  } else if (Lane.getNode()->isUndef()) {
7154  Lane = DAG.getUNDEF(MVT::i32);
7155  } else {
7156  assert(Lane.getValueType() == MVT::i32 &&
7157  "Unexpected BUILD_VECTOR operand type");
7158  }
7159  Ops.push_back(Lane);
7160  }
7161  return DAG.getBuildVector(VT, dl, Ops);
7162 }
7163 
7165  EVT VT = Op.getValueType();
7166 
7167  APInt DefBits(VT.getSizeInBits(), 0);
7168  APInt UndefBits(VT.getSizeInBits(), 0);
7169  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
7170  if (resolveBuildVector(BVN, DefBits, UndefBits)) {
7171  SDValue NewOp;
7172  if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) ||
7173  (NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
7174  (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) ||
7175  (NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
7176  (NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) ||
7177  (NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
7178  return NewOp;
7179 
7180  DefBits = ~DefBits;
7181  if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) ||
7182  (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) ||
7183  (NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits)))
7184  return NewOp;
7185 
7186  DefBits = UndefBits;
7187  if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) ||
7188  (NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
7189  (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) ||
7190  (NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
7191  (NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) ||
7192  (NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
7193  return NewOp;
7194 
7195  DefBits = ~UndefBits;
7196  if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) ||
7197  (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) ||
7198  (NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits)))
7199  return NewOp;
7200  }
7201 
7202  return SDValue();
7203 }
7204 
7205 SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
7206  SelectionDAG &DAG) const {
7207  EVT VT = Op.getValueType();
7208 
7209  // Try to build a simple constant vector.
7210  Op = NormalizeBuildVector(Op, DAG);
7211  if (VT.isInteger()) {
7212  // Certain vector constants, used to express things like logical NOT and
7213  // arithmetic NEG, are passed through unmodified. This allows special
7214  // patterns for these operations to match, which will lower these constants
7215  // to whatever is proven necessary.
7216  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
7217  if (BVN->isConstant())
7218  if (ConstantSDNode *Const = BVN->getConstantSplatNode()) {
7219  unsigned BitSize = VT.getVectorElementType().getSizeInBits();
7220  APInt Val(BitSize,
7221  Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
7222  if (Val.isNullValue() || Val.isAllOnesValue())
7223  return Op;
7224  }
7225  }
7226 
7227  if (SDValue V = ConstantBuildVector(Op, DAG))
7228  return V;
7229 
7230  // Scan through the operands to find some interesting properties we can
7231  // exploit:
7232  // 1) If only one value is used, we can use a DUP, or
7233  // 2) if only the low element is not undef, we can just insert that, or
7234  // 3) if only one constant value is used (w/ some non-constant lanes),
7235  // we can splat the constant value into the whole vector then fill
7236  // in the non-constant lanes.
7237  // 4) FIXME: If different constant values are used, but we can intelligently
7238  // select the values we'll be overwriting for the non-constant
7239  // lanes such that we can directly materialize the vector
7240  // some other way (MOVI, e.g.), we can be sneaky.
7241  // 5) if all operands are EXTRACT_VECTOR_ELT, check for VUZP.
7242  SDLoc dl(Op);
7243  unsigned NumElts = VT.getVectorNumElements();
7244  bool isOnlyLowElement = true;
7245  bool usesOnlyOneValue = true;
7246  bool usesOnlyOneConstantValue = true;
7247  bool isConstant = true;
7248  bool AllLanesExtractElt = true;
7249  unsigned NumConstantLanes = 0;
7250  SDValue Value;
7251  SDValue ConstantValue;
7252  for (unsigned i = 0; i < NumElts; ++i) {
7253  SDValue V = Op.getOperand(i);
7255  AllLanesExtractElt = false;
7256  if (V.isUndef())
7257  continue;
7258  if (i > 0)
7259  isOnlyLowElement = false;
7260  if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
7261  isConstant = false;
7262 
7263  if (isa<ConstantSDNode>(V) || isa<ConstantFPSDNode>(V)) {
7264  ++NumConstantLanes;
7265  if (!ConstantValue.getNode())
7266  ConstantValue = V;
7267  else if (ConstantValue != V)
7268  usesOnlyOneConstantValue = false;
7269  }
7270 
7271  if (!Value.getNode())
7272  Value = V;
7273  else if (V != Value)
7274  usesOnlyOneValue = false;
7275  }
7276 
7277  if (!Value.getNode()) {
7278  LLVM_DEBUG(
7279  dbgs() << "LowerBUILD_VECTOR: value undefined, creating undef node\n");
7280  return DAG.getUNDEF(VT);
7281  }
7282 
7283  // Convert BUILD_VECTOR where all elements but the lowest are undef into
7284  // SCALAR_TO_VECTOR, except for when we have a single-element constant vector
7285  // as SimplifyDemandedBits will just turn that back into BUILD_VECTOR.
7286  if (isOnlyLowElement && !(NumElts == 1 && isa<ConstantSDNode>(Value))) {
7287  LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 "
7288  "SCALAR_TO_VECTOR node\n");
7289  return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
7290  }
7291 
7292  if (AllLanesExtractElt) {
7293  SDNode *Vector = nullptr;
7294  bool Even = false;
7295  bool Odd = false;
7296  // Check whether the extract elements match the Even pattern <0,2,4,...> or
7297  // the Odd pattern <1,3,5,...>.
7298  for (unsigned i = 0; i < NumElts; ++i) {
7299  SDValue V = Op.getOperand(i);
7300  const SDNode *N = V.getNode();
7301  if (!isa<ConstantSDNode>(N->getOperand(1)))
7302  break;
7303  SDValue N0 = N->getOperand(0);
7304 
7305  // All elements are extracted from the same vector.
7306  if (!Vector) {
7307  Vector = N0.getNode();
7308  // Check that the type of EXTRACT_VECTOR_ELT matches the type of
7309  // BUILD_VECTOR.
7310  if (VT.getVectorElementType() !=
7312  break;
7313  } else if (Vector != N0.getNode()) {
7314  Odd = false;
7315  Even = false;
7316  break;
7317  }
7318 
7319  // Extracted values are either at Even indices <0,2,4,...> or at Odd
7320  // indices <1,3,5,...>.
7321  uint64_t Val = N->getConstantOperandVal(1);
7322  if (Val == 2 * i) {
7323  Even = true;
7324  continue;
7325  }
7326  if (Val - 1 == 2 * i) {
7327  Odd = true;
7328  continue;
7329  }
7330 
7331  // Something does not match: abort.
7332  Odd = false;
7333  Even = false;
7334  break;
7335  }
7336  if (Even || Odd) {
7337  SDValue LHS =
7338  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0),
7339  DAG.getConstant(0, dl, MVT::i64));
7340  SDValue RHS =
7341  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0),
7342  DAG.getConstant(NumElts, dl, MVT::i64));
7343 
7344  if (Even && !Odd)
7345  return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), LHS,
7346  RHS);
7347  if (Odd && !Even)
7348  return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), LHS,
7349  RHS);
7350  }
7351  }
7352 
7353  // Use DUP for non-constant splats. For f32 constant splats, reduce to
7354  // i32 and try again.
7355  if (usesOnlyOneValue) {
7356  if (!isConstant) {
7357  if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
7358  Value.getValueType() != VT) {
7359  LLVM_DEBUG(
7360  dbgs() << "LowerBUILD_VECTOR: use DUP for non-constant splats\n");
7361  return DAG.getNode(AArch64ISD::DUP, dl, VT, Value);
7362  }
7363 
7364  // This is actually a DUPLANExx operation, which keeps everything vectory.
7365 
7366  SDValue Lane = Value.getOperand(1);
7367  Value = Value.getOperand(0);
7368  if (Value.getValueSizeInBits() == 64) {
7369  LLVM_DEBUG(
7370  dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
7371  "widening it\n");
7372  Value = WidenVector(Value, DAG);
7373  }
7374 
7375  unsigned Opcode = getDUPLANEOp(VT.getVectorElementType());
7376  return DAG.getNode(Opcode, dl, VT, Value, Lane);
7377  }
7378 
7381  EVT EltTy = VT.getVectorElementType();
7382  assert ((EltTy == MVT::f16 || EltTy == MVT::f32 || EltTy == MVT::f64) &&
7383  "Unsupported floating-point vector type");
7384  LLVM_DEBUG(
7385  dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "
7386  "BITCASTS, and try again\n");
7387  MVT NewType = MVT::getIntegerVT(EltTy.getSizeInBits());
7388  for (unsigned i = 0; i < NumElts; ++i)
7389  Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i)));
7390  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts);
7391  SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
7392  LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: ";
7393  Val.dump(););
7394  Val = LowerBUILD_VECTOR(Val, DAG);
7395  if (Val.getNode())
7396  return DAG.getNode(ISD::BITCAST, dl, VT, Val);
7397  }
7398  }
7399 
7400  // If there was only one constant value used and for more than one lane,
7401  // start by splatting that value, then replace the non-constant lanes. This
7402  // is better than the default, which will perform a separate initialization
7403  // for each lane.
7404  if (NumConstantLanes > 0 && usesOnlyOneConstantValue) {
7405  // Firstly, try to materialize the splat constant.
7406  SDValue Vec = DAG.getSplatBuildVector(VT, dl, ConstantValue),
7407  Val = ConstantBuildVector(Vec, DAG);
7408  if (!Val) {
7409  // Otherwise, materialize the constant and splat it.
7410  Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue);
7411  DAG.ReplaceAllUsesWith(Vec.getNode(), &Val);
7412  }
7413 
7414  // Now insert the non-constant lanes.
7415  for (unsigned i = 0; i < NumElts; ++i) {
7416  SDValue V = Op.getOperand(i);
7417  SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
7418  if (!isa<ConstantSDNode>(V) && !isa<ConstantFPSDNode>(V))
7419  // Note that type legalization likely mucked about with the VT of the
7420  // source operand, so we may have to convert it here before inserting.
7421  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx);
7422  }
7423  return Val;
7424  }
7425 
7426  // This will generate a load from the constant pool.
7427  if (isConstant) {
7428  LLVM_DEBUG(
7429  dbgs() << "LowerBUILD_VECTOR: all elements are constant, use default "
7430  "expansion\n");
7431  return SDValue();
7432  }
7433 
7434  // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
7435  if (NumElts >= 4) {
7436  if (SDValue shuffle = ReconstructShuffle(Op, DAG))
7437  return shuffle;
7438  }
7439 
7440  // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
7441  // know the default expansion would otherwise fall back on something even
7442  // worse. For a vector with one or two non-undef values, that's
7443  // scalar_to_vector for the elements followed by a shuffle (provided the
7444  // shuffle is valid for the target) and materialization element by element
7445  // on the stack followed by a load for everything else.
7446  if (!isConstant && !usesOnlyOneValue) {
7447  LLVM_DEBUG(
7448  dbgs() << "LowerBUILD_VECTOR: alternatives failed, creating sequence "
7449  "of INSERT_VECTOR_ELT\n");
7450 
7451  SDValue Vec = DAG.getUNDEF(VT);
7452  SDValue Op0 = Op.getOperand(0);
7453  unsigned i = 0;
7454 
7455  // Use SCALAR_TO_VECTOR for lane zero to
7456  // a) Avoid a RMW dependency on the full vector register, and
7457  // b) Allow the register coalescer to fold away the copy if the
7458  // value is already in an S or D register, and we're forced to emit an
7459  // INSERT_SUBREG that we can't fold anywhere.
7460  //
7461  // We also allow types like i8 and i16 which are illegal scalar but legal
7462  // vector element types. After type-legalization the inserted value is
7463  // extended (i32) and it is safe to cast them to the vector type by ignoring
7464  // the upper bits of the lowest lane (e.g. v8i8, v4i16).
7465  if (!Op0.isUndef()) {
7466  LLVM_DEBUG(dbgs() << "Creating node for op0, it is not undefined:\n");
7467  Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op0);
7468  ++i;
7469  }
7470  LLVM_DEBUG(if (i < NumElts) dbgs()
7471  << "Creating nodes for the other vector elements:\n";);
7472  for (; i < NumElts; ++i) {
7473  SDValue V = Op.getOperand(i);
7474  if (V.isUndef())
7475  continue;
7476  SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
7477  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
7478  }
7479  return Vec;
7480  }
7481 
7482  LLVM_DEBUG(
7483  dbgs() << "LowerBUILD_VECTOR: use default expansion, failed to find "
7484  "better alternative\n");
7485  return SDValue();
7486 }
7487 
7488 SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
7489  SelectionDAG &DAG) const {
7490  assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
7491 
7492  // Check for non-constant or out of range lane.
7493  EVT VT = Op.getOperand(0).getValueType();
7495  if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
7496  return SDValue();
7497 
7498 
7499  // Insertion/extraction are legal for V128 types.
7500  if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
7501  VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
7502  VT == MVT::v8f16)
7503  return Op;
7504 
7505  if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
7506  VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16)
7507  return SDValue();
7508 
7509  // For V64 types, we perform insertion by expanding the value
7510  // to a V128 type and perform the insertion on that.
7511  SDLoc DL(Op);
7512  SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
7513  EVT WideTy = WideVec.getValueType();
7514 
7515  SDValue Node = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideTy, WideVec,
7516  Op.getOperand(1), Op.getOperand(2));
7517  // Re-narrow the resultant vector.
7518  return NarrowVector(Node, DAG);
7519 }
7520 
7521 SDValue
7522 AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
7523  SelectionDAG &DAG) const {
7524  assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
7525 
7526  // Check for non-constant or out of range lane.
7527  EVT VT = Op.getOperand(0).getValueType();
7529  if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
7530  return SDValue();
7531 
7532 
7533  // Insertion/extraction are legal for V128 types.
7534  if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
7535  VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
7536  VT == MVT::v8f16)
7537  return Op;
7538 
7539  if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
7540  VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16)
7541  return SDValue();
7542 
7543  // For V64 types, we perform extraction by expanding the value
7544  // to a V128 type and perform the extraction on that.
7545  SDLoc DL(Op);
7546  SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
7547  EVT WideTy = WideVec.getValueType();
7548 
7549  EVT ExtrTy = WideTy.getVectorElementType();
7550  if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8)
7551  ExtrTy = MVT::i32;
7552 
7553  // For extractions, we just return the result directly.
7554  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtrTy, WideVec,
7555  Op.getOperand(1));
7556 }
7557 
7558 SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
7559  SelectionDAG &DAG) const {
7560  EVT VT = Op.getOperand(0).getValueType();
7561  SDLoc dl(Op);
7562  // Just in case...
7563  if (!VT.isVector())
7564  return SDValue();
7565 
7567  if (!Cst)
7568  return SDValue();
7569  unsigned Val = Cst->getZExtValue();
7570 
7571  unsigned Size = Op.getValueSizeInBits();
7572 
7573  // This will get lowered to an appropriate EXTRACT_SUBREG in ISel.
7574  if (Val == 0)
7575  return Op;
7576 
7577  // If this is extracting the upper 64-bits of a 128-bit vector, we match
7578  // that directly.
7579  if (Size == 64 && Val * VT.getScalarSizeInBits() == 64)
7580  return Op;
7581 
7582  return SDValue();
7583 }
7584 
7586  if (VT.getVectorNumElements() == 4 &&
7587  (VT.is128BitVector() || VT.is64BitVector())) {
7588  unsigned PFIndexes[4];
7589  for (unsigned i = 0; i != 4; ++i) {
7590  if (M[i] < 0)
7591  PFIndexes[i] = 8;
7592  else
7593  PFIndexes[i] = M[i];
7594  }
7595 
7596  // Compute the index in the perfect shuffle table.
7597  unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
7598  PFIndexes[2] * 9 + PFIndexes[3];
7599  unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
7600  unsigned Cost = (PFEntry >> 30);
7601 
7602  if (Cost <= 4)
7603  return true;
7604  }
7605 
7606  bool DummyBool;
7607  int DummyInt;
7608  unsigned DummyUnsigned;
7609 
7610  return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isREVMask(M, VT, 64) ||
7611  isREVMask(M, VT, 32) || isREVMask(M, VT, 16) ||
7612  isEXTMask(M, VT, DummyBool, DummyUnsigned) ||
7613  // isTBLMask(M, VT) || // FIXME: Port TBL support from ARM.
7614  isTRNMask(M, VT, DummyUnsigned) || isUZPMask(M, VT, DummyUnsigned) ||
7615  isZIPMask(M, VT, DummyUnsigned) ||
7616  isTRN_v_undef_Mask(M, VT, DummyUnsigned) ||
7617  isUZP_v_undef_Mask(M, VT, DummyUnsigned) ||
7618  isZIP_v_undef_Mask(M, VT, DummyUnsigned) ||
7619  isINSMask(M, VT.getVectorNumElements(), DummyBool, DummyInt) ||
7620  isConcatMask(M, VT, VT.getSizeInBits() == 128));
7621 }
7622 
7623 /// getVShiftImm - Check if this is a valid build_vector for the immediate
7624 /// operand of a vector shift operation, where all the elements of the
7625 /// build_vector must have the same constant integer value.
7626 static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
7627  // Ignore bit_converts.
7628  while (Op.getOpcode() == ISD::BITCAST)
7629  Op = Op.getOperand(0);
7631  APInt SplatBits, SplatUndef;
7632  unsigned SplatBitSize;
7633  bool HasAnyUndefs;
7634  if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
7635  HasAnyUndefs, ElementBits) ||
7636  SplatBitSize > ElementBits)
7637  return false;
7638  Cnt = SplatBits.getSExtValue();
7639  return true;
7640 }
7641 
7642 /// isVShiftLImm - Check if this is a valid build_vector for the immediate
7643 /// operand of a vector shift left operation. That value must be in the range:
7644 /// 0 <= Value < ElementBits for a left shift; or
7645 /// 0 <= Value <= ElementBits for a long left shift.
7646 static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
7647  assert(VT.isVector() && "vector shift count is not a vector type");
7648  int64_t ElementBits = VT.getScalarSizeInBits();
7649  if (!getVShiftImm(Op, ElementBits, Cnt))
7650  return false;
7651  return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
7652 }
7653 
7654 /// isVShiftRImm - Check if this is a valid build_vector for the immediate
7655 /// operand of a vector shift right operation. The value must be in the range:
7656 /// 1 <= Value <= ElementBits for a right shift; or
7657 static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt) {
7658  assert(VT.isVector() && "vector shift count is not a vector type");
7659  int64_t ElementBits = VT.getScalarSizeInBits();
7660  if (!getVShiftImm(Op, ElementBits, Cnt))
7661  return false;
7662  return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
7663 }
7664 
7665 SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
7666  SelectionDAG &DAG) const {
7667  EVT VT = Op.getValueType();
7668  SDLoc DL(Op);
7669  int64_t Cnt;
7670 
7671  if (!Op.getOperand(1).getValueType().isVector())
7672  return Op;
7673  unsigned EltSize = VT.getScalarSizeInBits();
7674 
7675  switch (Op.getOpcode()) {
7676  default:
7677  llvm_unreachable("unexpected shift opcode");
7678 
7679  case ISD::SHL:
7680  if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
7681  return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
7682  DAG.getConstant(Cnt, DL, MVT::i32));
7683  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
7685  MVT::i32),
7686  Op.getOperand(0), Op.getOperand(1));
7687  case ISD::SRA:
7688  case ISD::SRL:
7689  // Right shift immediate
7690  if (isVShiftRImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) {
7691  unsigned Opc =
7693  return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
7694  DAG.getConstant(Cnt, DL, MVT::i32));
7695  }
7696 
7697  // Right shift register. Note, there is not a shift right register
7698  // instruction, but the shift left register instruction takes a signed
7699  // value, where negative numbers specify a right shift.
7700  unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::aarch64_neon_sshl
7702  // negate the shift amount
7703  SDValue NegShift = DAG.getNode(AArch64ISD::NEG, DL, VT, Op.getOperand(1));
7704  SDValue NegShiftLeft =
7705  DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
7706  DAG.getConstant(Opc, DL, MVT::i32), Op.getOperand(0),
7707  NegShift);
7708  return NegShiftLeft;
7709  }
7710 
7711  return SDValue();
7712 }
7713 
7715  AArch64CC::CondCode CC, bool NoNans, EVT VT,
7716  const SDLoc &dl, SelectionDAG &DAG) {
7717  EVT SrcVT = LHS.getValueType();
7718  assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&
7719  "function only supposed to emit natural comparisons");
7720 
7722  APInt CnstBits(VT.getSizeInBits(), 0);
7723  APInt UndefBits(VT.getSizeInBits(), 0);
7724  bool IsCnst = BVN && resolveBuildVector(BVN, CnstBits, UndefBits);
7725  bool IsZero = IsCnst && (CnstBits == 0);
7726 
7727  if (SrcVT.getVectorElementType().isFloatingPoint()) {
7728  switch (CC) {
7729  default:
7730  return SDValue();
7731  case AArch64CC::NE: {
7732  SDValue Fcmeq;
7733  if (IsZero)
7734  Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
7735  else
7736  Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
7737  return DAG.getNode(AArch64ISD::NOT, dl, VT, Fcmeq);
7738  }
7739  case AArch64CC::EQ:
7740  if (IsZero)
7741  return DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
7742  return DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
7743  case AArch64CC::GE:
7744  if (IsZero)
7745  return DAG.getNode(AArch64ISD::FCMGEz, dl, VT, LHS);
7746  return DAG.getNode(AArch64ISD::FCMGE, dl, VT, LHS, RHS);
7747  case AArch64CC::GT:
7748  if (IsZero)
7749  return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS);
7750  return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS);
7751  case AArch64CC::LS:
7752  if (IsZero)
7753  return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS);
7754  return DAG.getNode(AArch64ISD::FCMGE, dl, VT, RHS, LHS);
7755  case AArch64CC::LT:
7756  if (!NoNans)
7757  return SDValue();
7758  // If we ignore NaNs then we can use to the MI implementation.
7760  case AArch64CC::MI:
7761  if (IsZero)
7762  return DAG.getNode(AArch64ISD::FCMLTz, dl, VT, LHS);
7763  return DAG.getNode(AArch64ISD::FCMGT, dl, VT, RHS, LHS);
7764  }
7765  }
7766 
7767  switch (CC) {
7768  default:
7769  return SDValue();
7770  case AArch64CC::NE: {
7771  SDValue Cmeq;
7772  if (IsZero)
7773  Cmeq = DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
7774  else
7775  Cmeq = DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
7776  return DAG.getNode(AArch64ISD::NOT, dl, VT, Cmeq);
7777  }
7778  case AArch64CC::EQ:
7779  if (IsZero)
7780  return DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
7781  return DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
7782  case AArch64CC::GE:
7783  if (IsZero)
7784  return DAG.getNode(AArch64ISD::CMGEz, dl, VT, LHS);
7785  return DAG.getNode(AArch64ISD::CMGE, dl, VT, LHS, RHS);
7786  case AArch64CC::GT:
7787  if (IsZero)
7788  return DAG.getNode(AArch64ISD::CMGTz, dl, VT, LHS);
7789  return DAG.getNode(AArch64ISD::CMGT, dl, VT, LHS, RHS);
7790  case AArch64CC::LE:
7791  if (IsZero)
7792  return DAG.getNode(AArch64ISD::CMLEz, dl, VT, LHS);
7793  return DAG.getNode(AArch64ISD::CMGE, dl, VT, RHS, LHS);
7794  case AArch64CC::LS:
7795  return DAG.getNode(AArch64ISD::CMHS, dl, VT, RHS, LHS);
7796  case AArch64CC::LO:
7797  return DAG.getNode(AArch64ISD::CMHI, dl, VT, RHS, LHS);
7798  case AArch64CC::LT:
7799  if (IsZero)
7800  return DAG.getNode(AArch64ISD::CMLTz, dl, VT, LHS);
7801  return DAG.getNode(AArch64ISD::CMGT, dl, VT, RHS, LHS);
7802  case AArch64CC::HI:
7803  return DAG.getNode(AArch64ISD::CMHI, dl, VT, LHS, RHS);
7804  case AArch64CC::HS:
7805  return DAG.getNode(AArch64ISD::CMHS, dl, VT, LHS, RHS);
7806  }
7807 }
7808 
7809 SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
7810  SelectionDAG &DAG) const {
7811  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
7812  SDValue LHS = Op.getOperand(0);
7813  SDValue RHS = Op.getOperand(1);
7815  SDLoc dl(Op);
7816 
7818  assert(LHS.getValueType() == RHS.getValueType());
7820  SDValue Cmp =
7821  EmitVectorComparison(LHS, RHS, AArch64CC, false, CmpVT, dl, DAG);
7822  return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
7823  }
7824 
7825  const bool FullFP16 =
7826  static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
7827 
7828  // Make v4f16 (only) fcmp operations utilise vector instructions
7829  // v8f16 support will be a litle more complicated
7830  if (LHS.getValueType().getVectorElementType() == MVT::f16) {
7831  if (!FullFP16 && LHS.getValueType().getVectorNumElements() == 4) {
7832  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, LHS);
7833  RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, RHS);
7834  SDValue NewSetcc = DAG.getSetCC(dl, MVT::v4i16, LHS, RHS, CC);
7835  DAG.ReplaceAllUsesWith(Op, NewSetcc);
7836  CmpVT = MVT::v4i32;
7837  } else
7838  return SDValue();
7839  }
7840 
7843 
7844  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
7845  // clean. Some of them require two branches to implement.
7846  AArch64CC::CondCode CC1, CC2;
7847  bool ShouldInvert;
7848  changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert);
7849 
7851  SDValue Cmp =
7852  EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);
7853  if (!Cmp.getNode())
7854  return SDValue();
7855 
7856  if (CC2 != AArch64CC::AL) {
7857  SDValue Cmp2 =
7858  EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG);
7859  if (!Cmp2.getNode())
7860  return SDValue();
7861 
7862  Cmp = DAG.getNode(ISD::OR, dl, CmpVT, Cmp, Cmp2);
7863  }
7864 
7865  Cmp = DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
7866 
7867  if (ShouldInvert)
7868  Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType());
7869 
7870  return Cmp;
7871 }
7872 
7873 static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp,
7874  SelectionDAG &DAG) {
7875  SDValue VecOp = ScalarOp.getOperand(0);
7876  auto Rdx = DAG.getNode(Op, DL, VecOp.getSimpleValueType(), VecOp);
7877  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarOp.getValueType(), Rdx,
7878  DAG.getConstant(0, DL, MVT::i64));
7879 }
7880 
7881 SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
7882  SelectionDAG &DAG) const {
7883  SDLoc dl(Op);
7884  switch (Op.getOpcode()) {
7885  case ISD::VECREDUCE_ADD:
7886  return getReductionSDNode(AArch64ISD::UADDV, dl, Op, DAG);
7887  case ISD::VECREDUCE_SMAX:
7888  return getReductionSDNode(AArch64ISD::SMAXV, dl, Op, DAG);
7889  case ISD::VECREDUCE_SMIN:
7890  return getReductionSDNode(AArch64ISD::SMINV, dl, Op, DAG);
7891  case ISD::VECREDUCE_UMAX:
7892  return getReductionSDNode(AArch64ISD::UMAXV, dl, Op, DAG);
7893  case ISD::VECREDUCE_UMIN:
7894  return getReductionSDNode(AArch64ISD::UMINV, dl, Op, DAG);
7895  case ISD::VECREDUCE_FMAX: {
7896  assert(Op->getFlags().hasNoNaNs() && "fmax vector reduction needs NoNaN flag");
7897  return DAG.getNode(
7900  Op.getOperand(0));
7901  }
7902  case ISD::VECREDUCE_FMIN: {
7903  assert(Op->getFlags().hasNoNaNs() && "fmin vector reduction needs NoNaN flag");
7904  return DAG.getNode(
7907  Op.getOperand(0));
7908  }
7909  default:
7910  llvm_unreachable("Unhandled reduction");
7911  }
7912 }
7913 
7914 SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
7915  SelectionDAG &DAG) const {
7916  auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
7917  if (!Subtarget.hasLSE())
7918  return SDValue();
7919 
7920  // LSE has an atomic load-add instruction, but not a load-sub.
7921  SDLoc dl(Op);
7922  MVT VT = Op.getSimpleValueType();
7923  SDValue RHS = Op.getOperand(2);
7924  AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
7925  RHS = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), RHS);
7926  return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl, AN->getMemoryVT(),
7927  Op.getOperand(0), Op.getOperand(1), RHS,
7928  AN->getMemOperand());
7929 }
7930 
7931 SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
7932  SelectionDAG &DAG) const {
7933  auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
7934  if (!Subtarget.hasLSE())
7935  return SDValue();
7936 
7937  // LSE has an atomic load-clear instruction, but not a load-and.
7938  SDLoc dl(Op);
7939  MVT VT = Op.getSimpleValueType();
7940  SDValue RHS = Op.getOperand(2);
7941  AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
7942  RHS = DAG.getNode(ISD::XOR, dl, VT, DAG.getConstant(-1ULL, dl, VT), RHS);
7943  return DAG.getAtomic(ISD::ATOMIC_LOAD_CLR, dl, AN->getMemoryVT(),
7944  Op.getOperand(0), Op.getOperand(1), RHS,
7945  AN->getMemOperand());
7946 }
7947 
7948 SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
7949  SDValue Op, SDValue Chain, SDValue &Size, SelectionDAG &DAG) const {
7950  SDLoc dl(Op);
7951  EVT PtrVT = getPointerTy(DAG.getDataLayout());
7952  SDValue Callee = DAG.getTargetExternalSymbol("__chkstk", PtrVT, 0);
7953 
7954  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
7956  if (Subtarget->hasCustomCallingConv())
7958 
7959  Size = DAG.getNode(ISD::SRL, dl, MVT::i64, Size,
7960  DAG.getConstant(4, dl, MVT::i64));
7961  Chain = DAG.getCopyToReg(Chain, dl, AArch64::X15, Size, SDValue());
7962  Chain =
7964  Chain, Callee, DAG.getRegister(AArch64::X15, MVT::i64),
7965  DAG.getRegisterMask(Mask), Chain.getValue(1));
7966  // To match the actual intent better, we should read the output from X15 here
7967  // again (instead of potentially spilling it to the stack), but rereading Size
7968  // from X15 here doesn't work at -O0, since it thinks that X15 is undefined
7969  // here.
7970 
7971  Size = DAG.getNode(ISD::SHL, dl, MVT::i64, Size,
7972  DAG.getConstant(4, dl, MVT::i64));
7973  return Chain;
7974 }
7975 
7976 SDValue
7977 AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7978  SelectionDAG &DAG) const {
7979  assert(Subtarget->isTargetWindows() &&
7980  "Only Windows alloca probing supported");
7981  SDLoc dl(Op);
7982  // Get the inputs.
7983  SDNode *Node = Op.getNode();
7984  SDValue Chain = Op.getOperand(0);
7985  SDValue Size = Op.getOperand(1);
7986  unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
7987  EVT VT = Node->getValueType(0);
7988 
7990  "no-stack-arg-probe")) {
7991  SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
7992  Chain = SP.getValue(1);
7993  SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
7994  if (Align)
7995  SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
7996  DAG.getConstant(-(uint64_t)Align, dl, VT));
7997  Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
7998  SDValue Ops[2] = {SP, Chain};
7999  return DAG.getMergeValues(Ops, dl);
8000  }
8001 
8002  Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
8003 
8004  Chain = LowerWindowsDYNAMIC_STACKALLOC(Op, Chain, Size, DAG);
8005 
8006  SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
8007  Chain = SP.getValue(1);
8008  SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
8009  if (Align)
8010  SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
8011  DAG.getConstant(-(uint64_t)Align, dl, VT));
8012  Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
8013 
8014  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
8015  DAG.getIntPtrConstant(0, dl, true), SDValue(), dl);
8016 
8017  SDValue Ops[2] = {SP, Chain};
8018  return DAG.getMergeValues(Ops, dl);
8019 }
8020 
8021 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
8022 /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
8023 /// specified in the intrinsic calls.
8025  const CallInst &I,
8026  MachineFunction &MF,
8027  unsigned Intrinsic) const {
8028  auto &DL = I.getModule()->getDataLayout();
8029  switch (Intrinsic) {
8042  Info.opc = ISD::INTRINSIC_W_CHAIN;
8043  // Conservatively set memVT to the entire set of vectors loaded.
8044  uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
8045  Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
8046  Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
8047  Info.offset = 0;
8048  Info.align = 0;
8049  // volatile loads with NEON intrinsics not supported
8051  return true;
8052  }
8062  Info.opc = ISD::INTRINSIC_VOID;
8063  // Conservatively set memVT to the entire set of vectors stored.
8064  unsigned NumElts = 0;
8065  for (unsigned ArgI = 0, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
8066  Type *ArgTy = I.getArgOperand(ArgI)->getType();
8067  if (!ArgTy->isVectorTy())
8068  break;
8069  NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
8070  }
8071  Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
8072  Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
8073  Info.offset = 0;
8074  Info.align = 0;
8075  // volatile stores with NEON intrinsics not supported
8077  return true;
8078  }
8080  case Intrinsic::aarch64_ldxr: {
8081  PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
8082  Info.opc = ISD::INTRINSIC_W_CHAIN;
8083  Info.memVT = MVT::getVT(PtrTy->getElementType());
8084  Info.ptrVal = I.getArgOperand(0);
8085  Info.offset = 0;
8086  Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
8088  return true;
8089  }
8091  case Intrinsic::aarch64_stxr: {
8092  PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
8093  Info.opc = ISD::INTRINSIC_W_CHAIN;
8094  Info.memVT = MVT::getVT(PtrTy->getElementType());
8095  Info.ptrVal = I.getArgOperand(1);
8096  Info.offset = 0;
8097  Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
8099  return true;
8100  }
8103  Info.opc = ISD::INTRINSIC_W_CHAIN;
8104  Info.memVT = MVT::i128;
8105  Info.ptrVal = I.getArgOperand(0);
8106  Info.offset = 0;
8107  Info.align = 16;
8109  return true;
8112  Info.opc = ISD::INTRINSIC_W_CHAIN;
8113  Info.memVT = MVT::i128;
8114  Info.ptrVal = I.getArgOperand(2);
8115  Info.offset = 0;
8116  Info.align = 16;
8118  return true;
8119  default:
8120  break;
8121  }
8122 
8123  return false;
8124 }
8125 
8127  ISD::LoadExtType ExtTy,
8128  EVT NewVT) const {
8129  // TODO: This may be worth removing. Check regression tests for diffs.
8130  if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT))
8131  return false;
8132 
8133  // If we're reducing the load width in order to avoid having to use an extra
8134  // instruction to do extension then it's probably a good idea.
8135  if (ExtTy != ISD::NON_EXTLOAD)
8136  return true;
8137  // Don't reduce load width if it would prevent us from combining a shift into
8138  // the offset.
8139  MemSDNode *Mem = dyn_cast<MemSDNode>(Load);
8140  assert(Mem);
8141  const SDValue &Base = Mem->getBasePtr();
8142  if (Base.getOpcode() == ISD::ADD &&
8143  Base.getOperand(1).getOpcode() == ISD::SHL &&
8144  Base.getOperand(1).hasOneUse() &&
8145  Base.getOperand(1).getOperand(1).getOpcode() == ISD::Constant) {
8146  // The shift can be combined if it matches the size of the value being
8147  // loaded (and so reducing the width would make it not match).
8148  uint64_t ShiftAmount = Base.getOperand(1).getConstantOperandVal(1);
8149  uint64_t LoadBytes = Mem->getMemoryVT().getSizeInBits()/8;
8150  if (ShiftAmount == Log2_32(LoadBytes))
8151  return false;
8152  }
8153  // We have no reason to disallow reducing the load width, so allow it.
8154  return true;
8155 }
8156 
8157 // Truncations from 64-bit GPR to 32-bit GPR is free.
8159  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
8160  return false;
8161  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
8162  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
8163  return NumBits1 > NumBits2;
8164 }
8166  if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
8167  return false;
8168  unsigned NumBits1 = VT1.getSizeInBits();
8169  unsigned NumBits2 = VT2.getSizeInBits();
8170  return NumBits1 > NumBits2;
8171 }
8172 
8173 /// Check if it is profitable to hoist instruction in then/else to if.
8174 /// Not profitable if I and it's user can form a FMA instruction
8175 /// because we prefer FMSUB/FMADD.
8177  if (I->getOpcode() != Instruction::FMul)
8178  return true;
8179 
8180  if (!I->hasOneUse())
8181  return true;
8182 
8183  Instruction *User = I->user_back();
8184 
8185  if (User &&
8186  !(User->getOpcode() == Instruction::FSub ||
8187  User->getOpcode() == Instruction::FAdd))
8188  return true;
8189 
8190  const TargetOptions &Options = getTargetMachine().Options;
8191  const DataLayout &DL = I->getModule()->getDataLayout();
8192  EVT VT = getValueType(DL, User->getOperand(0)->getType());
8193 
8194  return !(isFMAFasterThanFMulAndFAdd(VT) &&
8196  (Options.AllowFPOpFusion == FPOpFusion::Fast ||
8197  Options.UnsafeFPMath));
8198 }
8199 
8200 // All 32-bit GPR operations implicitly zero the high-half of the corresponding
8201 // 64-bit GPR.
8203  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
8204  return false;
8205  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
8206  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
8207  return NumBits1 == 32 && NumBits2 == 64;
8208 }
8210  if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
8211  return false;
8212  unsigned NumBits1 = VT1.getSizeInBits();
8213  unsigned NumBits2 = VT2.getSizeInBits();
8214  return NumBits1 == 32 && NumBits2 == 64;
8215 }
8216 
8218  EVT VT1 = Val.getValueType();
8219  if (isZExtFree(VT1, VT2)) {
8220  return true;
8221  }
8222 
8223  if (Val.getOpcode() != ISD::LOAD)
8224  return false;
8225 
8226  // 8-, 16-, and 32-bit integer loads all implicitly zero-extend.
8227  return (VT1.isSimple() && !VT1.isVector() && VT1.isInteger() &&
8228  VT2.isSimple() && !VT2.isVector() && VT2.isInteger() &&
8229  VT1.getSizeInBits() <= 32);
8230 }
8231 
8232 bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
8233  if (isa<FPExtInst>(Ext))
8234  return false;
8235 
8236  // Vector types are not free.
8237  if (Ext->getType()->isVectorTy())
8238  return false;
8239 
8240  for (const Use &U : Ext->uses()) {
8241  // The extension is free if we can fold it with a left shift in an
8242  // addressing mode or an arithmetic operation: add, sub, and cmp.
8243 
8244  // Is there a shift?
8245  const Instruction *Instr = cast<Instruction>(U.getUser());
8246 
8247  // Is this a constant shift?
8248  switch (Instr->getOpcode()) {
8249  case Instruction::Shl:
8250  if (!isa<ConstantInt>(Instr->getOperand(1)))
8251  return false;
8252  break;
8253  case Instruction::GetElementPtr: {
8254  gep_type_iterator GTI = gep_type_begin(Instr);
8255  auto &DL = Ext->getModule()->getDataLayout();
8256  std::advance(GTI, U.getOperandNo()-1);
8257  Type *IdxTy = GTI.getIndexedType();
8258  // This extension will end up with a shift because of the scaling factor.
8259  // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
8260  // Get the shift amount based on the scaling factor:
8261  // log2(sizeof(IdxTy)) - log2(8).
8262  uint64_t ShiftAmt =
8263  countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy)) - 3;
8264  // Is the constant foldable in the shift of the addressing mode?
8265  // I.e., shift amount is between 1 and 4 inclusive.
8266  if (ShiftAmt == 0 || ShiftAmt > 4)
8267  return false;
8268  break;
8269  }
8270  case Instruction::Trunc:
8271  // Check if this is a noop.
8272  // trunc(sext ty1 to ty2) to ty1.
8273  if (Instr->getType() == Ext->getOperand(0)->getType())
8274  continue;
8276  default:
8277  return false;
8278  }
8279 
8280  // At this point we can use the bfm family, so this extension is free
8281  // for that use.
8282  }
8283  return true;
8284 }
8285 
8287  unsigned &RequiredAligment) const {
8288  if (!LoadedType.isSimple() ||
8289  (!LoadedType.isInteger() && !LoadedType.isFloatingPoint()))
8290  return false;
8291  // Cyclone supports unaligned accesses.
8292  RequiredAligment = 0;
8293  unsigned NumBits = LoadedType.getSizeInBits();
8294  return NumBits == 32 || NumBits == 64;
8295 }
8296 
8297 /// A helper function for determining the number of interleaved accesses we
8298 /// will generate when lowering accesses of the given type.
8299 unsigned
8301  const DataLayout &DL) const {
8302  return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
8303 }
8304 
8307  if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
8308  I.getMetadata(FALKOR_STRIDED_ACCESS_MD) != nullptr)
8309  return MOStridedAccess;
8311 }
8312 
8314  VectorType *VecTy, const DataLayout &DL) const {
8315 
8316  unsigned VecSize = DL.getTypeSizeInBits(VecTy);
8317  unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
8318 
8319  // Ensure the number of vector elements is greater than 1.
8320  if (VecTy->getNumElements() < 2)
8321  return false;
8322 
8323  // Ensure the element type is legal.
8324  if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
8325  return false;
8326 
8327  // Ensure the total vector size is 64 or a multiple of 128. Types larger than
8328  // 128 will be split into multiple interleaved accesses.
8329  return VecSize == 64 || VecSize % 128 == 0;
8330 }
8331 
8332 /// Lower an interleaved load into a ldN intrinsic.
8333 ///
8334 /// E.g. Lower an interleaved load (Factor = 2):
8335 /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
8336 /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
8337 /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
8338 ///
8339 /// Into:
8340 /// %ld2 = { <4 x i32>, <4 x i32> } call llvm.aarch64.neon.ld2(%ptr)
8341 /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
8342 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
8345  ArrayRef<unsigned> Indices, unsigned Factor) const {
8346  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
8347  "Invalid interleave factor");
8348  assert(!Shuffles.empty() && "Empty shufflevector input");
8349  assert(Shuffles.size() == Indices.size() &&
8350  "Unmatched number of shufflevectors and indices");
8351 
8352  const DataLayout &DL = LI->getModule()->getDataLayout();
8353 
8354  VectorType *VecTy = Shuffles[0]->getType();
8355 
8356  // Skip if we do not have NEON and skip illegal vector types. We can
8357  // "legalize" wide vector types into multiple interleaved accesses as long as
8358  // the vector types are divisible by 128.
8359  if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VecTy, DL))
8360  return false;
8361 
8362  unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
8363 
8364  // A pointer vector can not be the return type of the ldN intrinsics. Need to
8365  // load integer vectors first and then convert to pointer vectors.
8366  Type *EltTy = VecTy->getVectorElementType();
8367  if (EltTy->isPointerTy())
8368  VecTy =
8369  VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements());
8370 
8371  IRBuilder<> Builder(LI);
8372 
8373  // The base address of the load.
8374  Value *BaseAddr = LI->getPointerOperand();
8375 
8376  if (NumLoads > 1) {
8377  // If we're going to generate more than one load, reset the sub-vector type
8378  // to something legal.
8379  VecTy = VectorType::get(VecTy->getVectorElementType(),
8380  VecTy->getVectorNumElements() / NumLoads);
8381 
8382  // We will compute the pointer operand of each load from the original base
8383  // address using GEPs. Cast the base address to a pointer to the scalar
8384  // element type.
8385  BaseAddr = Builder.CreateBitCast(
8386  BaseAddr, VecTy->getVectorElementType()->getPointerTo(
8387  LI->getPointerAddressSpace()));
8388  }
8389 
8390  Type *PtrTy = VecTy->getPointerTo(LI->getPointerAddressSpace());
8391  Type *Tys[2] = {VecTy, PtrTy};
8392  static const Intrinsic::ID LoadInts[3] = {Intrinsic::aarch64_neon_ld2,
8395  Function *LdNFunc =
8396  Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
8397 
8398  // Holds sub-vectors extracted from the load intrinsic return values. The
8399  // sub-vectors are associated with the shufflevector instructions they will
8400  // replace.
8402 
8403  for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
8404 
8405  // If we're generating more than one load, compute the base address of
8406  // subsequent loads as an offset from the previous.
8407  if (LoadCount > 0)
8408  BaseAddr = Builder.CreateConstGEP1_32(
8409  BaseAddr, VecTy->getVectorNumElements() * Factor);
8410 
8411  CallInst *LdN = Builder.CreateCall(
8412  LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy), "ldN");
8413 
8414  // Extract and store the sub-vectors returned by the load intrinsic.
8415  for (unsigned i = 0; i < Shuffles.size(); i++) {
8416  ShuffleVectorInst *SVI = Shuffles[i];
8417  unsigned Index = Indices[i];
8418 
8419  Value *SubVec = Builder.CreateExtractValue(LdN, Index);
8420 
8421  // Convert the integer vector to pointer vector if the element is pointer.
8422  if (EltTy->isPointerTy())
8423  SubVec = Builder.CreateIntToPtr(
8424  SubVec, VectorType::get(SVI->getType()->getVectorElementType(),
8425  VecTy->getVectorNumElements()));
8426  SubVecs[SVI].push_back(SubVec);
8427  }
8428  }
8429 
8430  // Replace uses of the shufflevector instructions with the sub-vectors
8431  // returned by the load intrinsic. If a shufflevector instruction is
8432  // associated with more than one sub-vector, those sub-vectors will be
8433  // concatenated into a single wide vector.
8434  for (ShuffleVectorInst *SVI : Shuffles) {
8435  auto &SubVec = SubVecs[SVI];
8436  auto *WideVec =
8437  SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
8438  SVI->replaceAllUsesWith(WideVec);
8439  }
8440 
8441  return true;
8442 }
8443 
8444 /// Lower an interleaved store into a stN intrinsic.
8445 ///
8446 /// E.g. Lower an interleaved store (Factor = 3):
8447 /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
8448 /// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
8449 /// store <12 x i32> %i.vec, <12 x i32>* %ptr
8450 ///
8451 /// Into:
8452 /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
8453 /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
8454 /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
8455 /// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
8456 ///
8457 /// Note that the new shufflevectors will be removed and we'll only generate one
8458 /// st3 instruction in CodeGen.
8459 ///
8460 /// Example for a more general valid mask (Factor 3). Lower:
8461 /// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
8462 /// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
8463 /// store <12 x i32> %i.vec, <12 x i32>* %ptr
8464 ///
8465 /// Into:
8466 /// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
8467 /// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
8468 /// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
8469 /// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
8471  ShuffleVectorInst *SVI,
8472  unsigned Factor) const {
8473  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
8474  "Invalid interleave factor");
8475 
8476  VectorType *VecTy = SVI->getType();
8477  assert(VecTy->getVectorNumElements() % Factor == 0 &&
8478  "Invalid interleaved store");
8479 
8480  unsigned LaneLen = VecTy->getVectorNumElements() / Factor;
8481  Type *EltTy = VecTy->getVectorElementType();
8482  VectorType *SubVecTy = VectorType::get(EltTy, LaneLen);
8483 
8484  const DataLayout &DL = SI->getModule()->getDataLayout();
8485 
8486  // Skip if we do not have NEON and skip illegal vector types. We can
8487  // "legalize" wide vector types into multiple interleaved accesses as long as
8488  // the vector types are divisible by 128.
8489  if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(SubVecTy, DL))
8490  return false;
8491 
8492  unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
8493 
8494  Value *Op0 = SVI->getOperand(0);
8495  Value *Op1 = SVI->getOperand(1);
8496  IRBuilder<> Builder(SI);
8497 
8498  // StN intrinsics don't support pointer vectors as arguments. Convert pointer
8499  // vectors to integer vectors.
8500  if (EltTy->isPointerTy()) {
8501  Type *IntTy = DL.getIntPtrType(EltTy);
8502  unsigned NumOpElts = Op0->getType()->getVectorNumElements();
8503 
8504  // Convert to the corresponding integer vector.
8505  Type *IntVecTy = VectorType::get(IntTy, NumOpElts);
8506  Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
8507  Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
8508 
8509  SubVecTy = VectorType::get(IntTy, LaneLen);
8510  }
8511 
8512  // The base address of the store.
8513  Value *BaseAddr = SI->getPointerOperand();
8514 
8515  if (NumStores > 1) {
8516  // If we're going to generate more than one store, reset the lane length
8517  // and sub-vector type to something legal.
8518  LaneLen /= NumStores;
8519  SubVecTy = VectorType::get(SubVecTy->getVectorElementType(), LaneLen);
8520 
8521  // We will compute the pointer operand of each store from the original base
8522  // address using GEPs. Cast the base address to a pointer to the scalar
8523  // element type.
8524  BaseAddr = Builder.CreateBitCast(
8525  BaseAddr, SubVecTy->getVectorElementType()->getPointerTo(
8526  SI->getPointerAddressSpace()));
8527  }
8528 
8529  auto Mask = SVI->getShuffleMask();
8530 
8531  Type *PtrTy = SubVecTy->getPointerTo(SI->getPointerAddressSpace());
8532  Type *Tys[2] = {SubVecTy, PtrTy};
8533  static const Intrinsic::ID StoreInts[3] = {Intrinsic::aarch64_neon_st2,
8536  Function *StNFunc =
8537  Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys);
8538 
8539  for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
8540 
8542 
8543  // Split the shufflevector operands into sub vectors for the new stN call.
8544  for (unsigned i = 0; i < Factor; i++) {
8545  unsigned IdxI = StoreCount * LaneLen * Factor + i;
8546  if (Mask[IdxI] >= 0) {
8547  Ops.push_back(Builder.CreateShuffleVector(
8548  Op0, Op1, createSequentialMask(Builder, Mask[IdxI], LaneLen, 0)));
8549  } else {
8550  unsigned StartMask = 0;
8551  for (unsigned j = 1; j < LaneLen; j++) {
8552  unsigned IdxJ = StoreCount * LaneLen * Factor + j;
8553  if (Mask[IdxJ * Factor + IdxI] >= 0) {
8554  StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
8555  break;
8556  }
8557  }
8558  // Note: Filling undef gaps with random elements is ok, since
8559  // those elements were being written anyway (with undefs).
8560  // In the case of all undefs we're defaulting to using elems from 0
8561  // Note: StartMask cannot be negative, it's checked in
8562  // isReInterleaveMask
8563  Ops.push_back(Builder.CreateShuffleVector(
8564  Op0, Op1, createSequentialMask(Builder, StartMask, LaneLen, 0)));
8565  }
8566  }
8567 
8568  // If we generating more than one store, we compute the base address of
8569  // subsequent stores as an offset from the previous.
8570  if (StoreCount > 0)
8571  BaseAddr = Builder.CreateConstGEP1_32(BaseAddr, LaneLen * Factor);
8572 
8573  Ops.push_back(Builder.CreateBitCast(BaseAddr, PtrTy));
8574  Builder.CreateCall(StNFunc, Ops);
8575  }
8576  return true;
8577 }
8578 
8579 static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
8580  unsigned AlignCheck) {
8581  return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
8582  (DstAlign == 0 || DstAlign % AlignCheck == 0));
8583 }
8584 
8585 EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
8586  unsigned SrcAlign, bool IsMemset,
8587  bool ZeroMemset,
8588  bool MemcpyStrSrc,
8589  MachineFunction &MF) const {
8590  const Function &F = MF.getFunction();
8591  bool CanImplicitFloat = !F.hasFnAttribute(Attribute::NoImplicitFloat);
8592  bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
8593  bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
8594  // Only use AdvSIMD to implement memset of 32-byte and above. It would have
8595  // taken one instruction to materialize the v2i64 zero and one store (with
8596  // restrictive addressing mode). Just do i64 stores.
8597  bool IsSmallMemset = IsMemset && Size < 32;
8598  auto AlignmentIsAcceptable = [&](EVT VT, unsigned AlignCheck) {
8599  if (memOpAlign(SrcAlign, DstAlign, AlignCheck))
8600  return true;
8601  bool Fast;
8602  return allowsMisalignedMemoryAccesses(VT, 0, 1, &Fast) && Fast;
8603  };
8604 
8605  if (CanUseNEON && IsMemset && !IsSmallMemset &&
8606  AlignmentIsAcceptable(MVT::v2i64, 16))
8607  return MVT::v2i64;
8608  if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, 16))
8609  return MVT::f128;
8610  if (Size >= 8 && AlignmentIsAcceptable(MVT::i64, 8))
8611  return MVT::i64;
8612  if (Size >= 4 && AlignmentIsAcceptable(MVT::i32, 4))
8613  return MVT::i32;
8614  return MVT::Other;
8615 }
8616 
8617 // 12-bit optionally shifted immediates are legal for adds.
8619  if (Immed == std::numeric_limits<int64_t>::min()) {
8620  LLVM_DEBUG(dbgs() << "Illegal add imm " << Immed
8621  << ": avoid UB for INT64_MIN\n");
8622  return false;
8623  }
8624  // Same encoding for add/sub, just flip the sign.
8625  Immed = std::abs(Immed);
8626  bool IsLegal = ((Immed >> 12) == 0 ||
8627  ((Immed & 0xfff) == 0 && Immed >> 24 == 0));
8628  LLVM_DEBUG(dbgs() << "Is " << Immed
8629  << " legal add imm: " << (IsLegal ? "yes" : "no") << "\n");
8630  return IsLegal;
8631 }
8632 
8633 // Integer comparisons are implemented with ADDS/SUBS, so the range of valid
8634 // immediates is the same as for an add or a sub.
8636  return isLegalAddImmediate(Immed);
8637 }
8638 
8639 /// isLegalAddressingMode - Return true if the addressing mode represented
8640 /// by AM is legal for this target, for a load/store of the specified type.
8642  const AddrMode &AM, Type *Ty,
8643  unsigned AS, Instruction *I) const {
8644  // AArch64 has five basic addressing modes:
8645  // reg
8646  // reg + 9-bit signed offset
8647  // reg + SIZE_IN_BYTES * 12-bit unsigned offset
8648  // reg1 + reg2
8649  // reg + SIZE_IN_BYTES * reg
8650 
8651  // No global is ever allowed as a base.
8652  if (AM.BaseGV)
8653  return false;
8654 
8655  // No reg+reg+imm addressing.
8656  if (AM.HasBaseReg && AM.BaseOffs && AM.Scale)
8657  return false;
8658 
8659  // check reg + imm case:
8660  // i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12
8661  uint64_t NumBytes = 0;
8662  if (Ty->isSized()) {
8663  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
8664  NumBytes = NumBits / 8;
8665  if (!isPowerOf2_64(NumBits))
8666  NumBytes = 0;
8667  }
8668 
8669  if (!AM.Scale) {
8670  int64_t Offset = AM.BaseOffs;
8671 
8672  // 9-bit signed offset
8673  if (isInt<9>(Offset))
8674  return true;
8675 
8676  // 12-bit unsigned offset
8677  unsigned shift = Log2_64(NumBytes);
8678  if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
8679  // Must be a multiple of NumBytes (NumBytes is a power of 2)
8680  (Offset >> shift) << shift == Offset)
8681  return true;
8682  return false;
8683  }
8684 
8685  // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
8686 
8687  return AM.Scale == 1 || (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes);
8688 }
8689 
8691  // Consider splitting large offset of struct or array.
8692  return true;
8693 }
8694 
8696  const AddrMode &AM, Type *Ty,
8697  unsigned AS) const {
8698  // Scaling factors are not free at all.
8699  // Operands | Rt Latency
8700  // -------------------------------------------
8701  // Rt, [Xn, Xm] | 4
8702  // -------------------------------------------
8703  // Rt, [Xn, Xm, lsl #imm] | Rn: 4 Rm: 5
8704  // Rt, [Xn, Wm, <extend> #imm] |
8705  if (isLegalAddressingMode(DL, AM, Ty, AS))
8706  // Scale represents reg2 * scale, thus account for 1 if
8707  // it is not equal to 0 or 1.
8708  return AM.Scale != 0 && AM.Scale != 1;
8709  return -1;
8710 }
8711 
8713  VT = VT.getScalarType();
8714 
8715  if (!VT.isSimple())
8716  return false;
8717 
8718  switch (VT.getSimpleVT().SimpleTy) {
8719  case MVT::f32:
8720  case MVT::f64:
8721  return true;
8722  default:
8723  break;
8724  }
8725 
8726  return false;
8727 }
8728 
8729 const MCPhysReg *
8731  // LR is a callee-save register, but we must treat it as clobbered by any call
8732  // site. Hence we include LR in the scratch registers, which are in turn added
8733  // as implicit-defs for stackmaps and patchpoints.
8734  static const MCPhysReg ScratchRegs[] = {
8735  AArch64::X16, AArch64::X17, AArch64::LR, 0
8736  };
8737  return ScratchRegs;
8738 }
8739 
8740 bool
8742  CombineLevel Level) const {
8743  N = N->getOperand(0).getNode();
8744  EVT VT = N->getValueType(0);
8745  // If N is unsigned bit extraction: ((x >> C) & mask), then do not combine
8746  // it with shift to let it be lowered to UBFX.
8747  if (N->getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) &&
8748  isa<ConstantSDNode>(N->getOperand(1))) {
8749  uint64_t TruncMask = N->getConstantOperandVal(1);
8750  if (isMask_64(TruncMask) &&
8751  N->getOperand(0).getOpcode() == ISD::SRL &&
8752  isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
8753  return false;
8754  }
8755  return true;
8756 }
8757 
8759  Type *Ty) const {
8760  assert(Ty->isIntegerTy());
8761 
8762  unsigned BitSize = Ty->getPrimitiveSizeInBits();
8763  if (BitSize == 0)
8764  return false;
8765 
8766  int64_t Val = Imm.getSExtValue();
8767  if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, BitSize))
8768  return true;
8769 
8770  if ((int64_t)Val < 0)
8771  Val = ~Val;
8772  if (BitSize == 32)
8773  Val &= (1LL << 32) - 1;
8774 
8775  unsigned LZ = countLeadingZeros((uint64_t)Val);
8776  unsigned Shift = (63 - LZ) / 16;
8777  // MOVZ is free so return true for one or fewer MOVK.
8778  return Shift < 3;
8779 }
8780 
8782  unsigned Index) const {
8784  return false;
8785 
8786  return (Index == 0 || Index == ResVT.getVectorNumElements());
8787 }
8788 
8789 /// Turn vector tests of the signbit in the form of:
8790 /// xor (sra X, elt_size(X)-1), -1
8791 /// into:
8792 /// cmge X, X, #0
8794  const AArch64Subtarget *Subtarget) {
8795  EVT VT = N->getValueType(0);
8796  if (!Subtarget->hasNEON() || !VT.isVector())
8797  return SDValue();
8798 
8799  // There must be a shift right algebraic before the xor, and the xor must be a
8800  // 'not' operation.
8801  SDValue Shift = N->getOperand(0);
8802  SDValue Ones = N->getOperand(1);
8803  if (Shift.getOpcode() != AArch64ISD::VASHR || !Shift.hasOneUse() ||
8805  return SDValue();
8806 
8807  // The shift should be smearing the sign bit across each vector element.
8808  auto *ShiftAmt = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
8809  EVT ShiftEltTy = Shift.getValueType().getVectorElementType();
8810  if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.getSizeInBits() - 1)
8811  return SDValue();
8812 
8813  return DAG.getNode(AArch64ISD::CMGEz, SDLoc(N), VT, Shift.getOperand(0));
8814 }
8815 
8816 // Generate SUBS and CSEL for integer abs.
8818  EVT VT = N->getValueType(0);
8819 
8820  SDValue N0 = N->getOperand(0);
8821  SDValue N1 = N->getOperand(1);
8822  SDLoc DL(N);
8823 
8824  // Check pattern of XOR(ADD(X,Y), Y) where Y is SRA(X, size(X)-1)
8825  // and change it to SUB and CSEL.
8826  if (VT.isInteger() && N->getOpcode() == ISD::XOR &&
8827  N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
8828  N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0))
8829  if (ConstantSDNode *Y1C = dyn_cast<ConstantSDNode>(N1.getOperand(1)))
8830  if (Y1C->getAPIntValue() == VT.getSizeInBits() - 1) {
8831  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
8832  N0.getOperand(0));
8833  // Generate SUBS & CSEL.
8834  SDValue Cmp =
8835  DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
8836  N0.getOperand(0), DAG.getConstant(0, DL, VT));
8837  return DAG.getNode(AArch64ISD::CSEL, DL, VT, N0.getOperand(0), Neg,
8839  SDValue(Cmp.getNode(), 1));
8840  }
8841  return SDValue();
8842 }
8843 
8846  const AArch64Subtarget *Subtarget) {
8847  if (DCI.isBeforeLegalizeOps())
8848  return SDValue();
8849 
8850  if (SDValue Cmp = foldVectorXorShiftIntoCmp(N, DAG, Subtarget))
8851  return Cmp;
8852 
8853  return performIntegerAbsCombine(N, DAG);
8854 }
8855 
8856 SDValue
8857 AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
8858  SelectionDAG &DAG,
8859  SmallVectorImpl<SDNode *> &Created) const {
8861  if (isIntDivCheap(N->getValueType(0), Attr))
8862  return SDValue(N,0); // Lower SDIV as SDIV
8863 
8864  // fold (sdiv X, pow2)
8865  EVT VT = N->getValueType(0);
8866  if ((VT != MVT::i32 && VT != MVT::i64) ||
8867  !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
8868  return SDValue();
8869 
8870  SDLoc DL(N);
8871  SDValue N0 = N->getOperand(0);
8872  unsigned Lg2 = Divisor.countTrailingZeros();
8873  SDValue Zero = DAG.getConstant(0, DL, VT);
8874  SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
8875 
8876  // Add (N0 < 0) ? Pow2 - 1 : 0;
8877  SDValue CCVal;
8878  SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETLT, CCVal, DAG, DL);
8879  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
8880  SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp);
8881 
8882  Created.push_back(Cmp.getNode());
8883  Created.push_back(Add.getNode());
8884  Created.push_back(CSel.getNode());
8885 
8886  // Divide by pow2.
8887  SDValue SRA =
8888  DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, DL, MVT::i64));
8889 
8890  // If we're dividing by a positive value, we're done. Otherwise, we must
8891  // negate the result.
8892  if (Divisor.isNonNegative())
8893  return SRA;
8894 
8895  Created.push_back(SRA.getNode());
8896  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
8897 }
8898 
8901  const AArch64Subtarget *Subtarget) {
8902  if (DCI.isBeforeLegalizeOps())
8903  return SDValue();
8904 
8905  // The below optimizations require a constant RHS.
8906  if (!isa<ConstantSDNode>(N->getOperand(1)))
8907  return SDValue();
8908 
8909  ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(1));
8910  const APInt &ConstValue = C->getAPIntValue();
8911 
8912  // Multiplication of a power of two plus/minus one can be done more
8913  // cheaply as as shift+add/sub. For now, this is true unilaterally. If
8914  // future CPUs have a cheaper MADD instruction, this may need to be
8915  // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
8916  // 64-bit is 5 cycles, so this is always a win.
8917  // More aggressively, some multiplications N0 * C can be lowered to
8918  // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M,
8919  // e.g. 6=3*2=(2+1)*2.
8920  // TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45
8921  // which equals to (1+2)*16-(1+2).
8922  SDValue N0 = N->getOperand(0);
8923  // TrailingZeroes is used to test if the mul can be lowered to
8924  // shift+add+shift.
8925  unsigned TrailingZeroes = ConstValue.countTrailingZeros();
8926  if (TrailingZeroes) {
8927  // Conservatively do not lower to shift+add+shift if the mul might be
8928  // folded into smul or umul.
8929  if (N0->hasOneUse() && (isSignExtended(N0.getNode(), DAG) ||
8930  isZeroExtended(N0.getNode(), DAG)))
8931  return SDValue();
8932  // Conservatively do not lower to shift+add+shift if the mul might be
8933  // folded into madd or msub.
8934  if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ADD ||
8935  N->use_begin()->getOpcode() == ISD::SUB))
8936  return SDValue();
8937  }
8938  // Use ShiftedConstValue instead of ConstValue to support both shift+add/sub
8939  // and shift+add+shift.
8940  APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);
8941 
8942  unsigned ShiftAmt, AddSubOpc;
8943  // Is the shifted value the LHS operand of the add/sub?
8944  bool ShiftValUseIsN0 = true;
8945  // Do we need to negate the result?
8946  bool NegateResult = false;
8947 
8948  if (ConstValue.isNonNegative()) {
8949  // (mul x, 2^N + 1) => (add (shl x, N), x)
8950  // (mul x, 2^N - 1) => (sub (shl x, N), x)
8951  // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
8952  APInt SCVMinus1 = ShiftedConstValue - 1;
8953  APInt CVPlus1 = ConstValue + 1;
8954  if (SCVMinus1.isPowerOf2()) {
8955  ShiftAmt = SCVMinus1.logBase2();
8956  AddSubOpc = ISD::ADD;
8957  } else if (CVPlus1.isPowerOf2()) {
8958  ShiftAmt = CVPlus1.logBase2();
8959  AddSubOpc = ISD::SUB;
8960  } else
8961  return SDValue();
8962  } else {
8963  // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
8964  // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
8965  APInt CVNegPlus1 = -ConstValue + 1;
8966  APInt CVNegMinus1 = -ConstValue - 1;
8967  if (CVNegPlus1.isPowerOf2()) {
8968  ShiftAmt = CVNegPlus1.logBase2();
8969  AddSubOpc = ISD::SUB;
8970  ShiftValUseIsN0 = false;
8971  } else if (CVNegMinus1.isPowerOf2()) {
8972  ShiftAmt = CVNegMinus1.logBase2();
8973  AddSubOpc = ISD::ADD;
8974  NegateResult = true;
8975  } else
8976  return SDValue();
8977  }
8978 
8979  SDLoc DL(N);
8980  EVT VT = N->getValueType(0);
8981  SDValue ShiftedVal = DAG.getNode(ISD::SHL, DL, VT, N0,
8982  DAG.getConstant(ShiftAmt, DL, MVT::i64));
8983 
8984  SDValue AddSubN0 = ShiftValUseIsN0 ? ShiftedVal : N0;
8985  SDValue AddSubN1 = ShiftValUseIsN0 ? N0 : ShiftedVal;
8986  SDValue Res = DAG.getNode(AddSubOpc, DL, VT, AddSubN0, AddSubN1);
8987  assert(!(NegateResult && TrailingZeroes) &&
8988  "NegateResult and TrailingZeroes cannot both be true for now.");
8989  // Negate the result.
8990  if (NegateResult)
8991  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
8992  // Shift the result.
8993  if (TrailingZeroes)
8994  return DAG.getNode(ISD::SHL, DL, VT, Res,
8995  DAG.getConstant(TrailingZeroes, DL, MVT::i64));
8996  return Res;
8997 }
8998 
9000  SelectionDAG &DAG) {
9001  // Take advantage of vector comparisons producing 0 or -1 in each lane to
9002  // optimize away operation when it's from a constant.
9003  //
9004  // The general transformation is:
9005  // UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
9006  // AND(VECTOR_CMP(x,y), constant2)
9007  // constant2 = UNARYOP(constant)
9008 
9009  // Early exit if this isn't a vector operation, the operand of the
9010  // unary operation isn't a bitwise AND, or if the sizes of the operations
9011  // aren't the same.
9012  EVT VT = N->getValueType(0);
9013  if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||
9014  N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC ||
9015  VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits())
9016  return SDValue();
9017 
9018  // Now check that the other operand of the AND is a constant. We could
9019  // make the transformation for non-constant splats as well, but it's unclear
9020  // that would be a benefit as it would not eliminate any operations, just
9021  // perform one more step in scalar code before moving to the vector unit.
9022  if (BuildVectorSDNode *BV =
9023  dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
9024  // Bail out if the vector isn't a constant.
9025  if (!BV->isConstant())
9026  return SDValue();
9027 
9028  // Everything checks out. Build up the new and improved node.
9029  SDLoc DL(N);
9030  EVT IntVT = BV->getValueType(0);
9031  // Create a new constant of the appropriate type for the transformed
9032  // DAG.
9033  SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
9034  // The AND node needs bitcasts to/from an integer vector type around it.
9035  SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst);
9036  SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT,
9037  N->getOperand(0)->getOperand(0), MaskConst);
9038  SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd);
9039  return Res;
9040  }
9041 
9042  return SDValue();
9043 }
9044 
9046  const AArch64Subtarget *Subtarget) {
9047  // First try to optimize away the conversion when it's conditionally from
9048  // a constant. Vectors only.
9050  return Res;
9051 
9052  EVT VT = N->getValueType(0);
9053  if (VT != MVT::f32 && VT != MVT::f64)
9054  return SDValue();
9055 
9056  // Only optimize when the source and destination types have the same width.
9057  if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
9058  return SDValue();
9059 
9060  // If the result of an integer load is only used by an integer-to-float
9061  // conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead.
9062  // This eliminates an "integer-to-vector-move" UOP and improves throughput.
9063  SDValue N0 = N->getOperand(0);
9064  if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
9065  // Do not change the width of a volatile load.
9066  !cast<LoadSDNode>(N0)->isVolatile()) {
9067  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9068  SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
9069  LN0->getPointerInfo(), LN0->getAlignment(),
9070  LN0->getMemOperand()->getFlags());
9071 
9072  // Make sure successors of the original load stay after it by updating them
9073  // to use the new Chain.
9074  DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
9075 
9076  unsigned Opcode =
9078  return DAG.getNode(Opcode, SDLoc(N), VT, Load);
9079  }
9080 
9081  return SDValue();
9082 }
9083 
9084 /// Fold a floating-point multiply by power of two into floating-point to
9085 /// fixed-point conversion.
9088  const AArch64Subtarget *Subtarget) {
9089  if (!Subtarget->hasNEON())
9090  return SDValue();
9091 
9092  SDValue Op = N->getOperand(0);
9093  if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
9094  Op.getOpcode() != ISD::FMUL)
9095  return SDValue();
9096 
9097  SDValue ConstVec = Op->getOperand(1);
9098  if (!isa<BuildVectorSDNode>(ConstVec))
9099  return SDValue();
9100 
9101  MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
9102  uint32_t FloatBits = FloatTy.getSizeInBits();
9103  if (FloatBits != 32 && FloatBits != 64)
9104  return SDValue();
9105 
9107  uint32_t IntBits = IntTy.getSizeInBits();
9108  if (IntBits != 16 && IntBits != 32 && IntBits != 64)
9109  return SDValue();
9110 
9111  // Avoid conversions where iN is larger than the float (e.g., float -> i64).
9112  if (IntBits > FloatBits)
9113  return SDValue();
9114 
9115  BitVector UndefElements;
9116  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
9117  int32_t Bits = IntBits == 64 ? 64 : 32;
9118  int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, Bits + 1);
9119  if (C == -1 || C == 0 || C > Bits)
9120  return SDValue();
9121 
9122  MVT ResTy;
9123  unsigned NumLanes = Op.getValueType().getVectorNumElements();
9124  switch (NumLanes) {
9125  default:
9126  return SDValue();
9127  case 2:
9128  ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
9129  break;
9130  case 4:
9131  ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
9132  break;
9133  }
9134 
9135  if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps())
9136  return SDValue();
9137 
9138  assert((ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps()) &&
9139  "Illegal vector type after legalization");
9140 
9141  SDLoc DL(N);
9142  bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
9143  unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
9145  SDValue FixConv =
9146  DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ResTy,
9147  DAG.getConstant(IntrinsicOpcode, DL, MVT::i32),
9148  Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32));
9149  // We can handle smaller integers by generating an extra trunc.
9150  if (IntBits < FloatBits)
9151  FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv);
9152 
9153  return FixConv;
9154 }
9155 
9156 /// Fold a floating-point divide by power of two into fixed-point to
9157 /// floating-point conversion.
9160  const AArch64Subtarget *Subtarget) {
9161  if (!Subtarget->hasNEON())
9162  return SDValue();
9163 
9164  SDValue Op = N->getOperand(0);
9165  unsigned Opc = Op->getOpcode();
9166  if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
9167  !Op.getOperand(0).getValueType().isSimple() ||
9168  (Opc != ISD::SINT_TO_FP && Opc != ISD::UINT_TO_FP))
9169  return SDValue();
9170 
9171  SDValue ConstVec = N->getOperand(1);
9172  if (!isa<BuildVectorSDNode>(ConstVec))
9173  return SDValue();
9174 
9176  int32_t IntBits = IntTy.getSizeInBits();
9177  if (IntBits != 16 && IntBits != 32 && IntBits != 64)
9178  return SDValue();
9179 
9180  MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
9181  int32_t FloatBits = FloatTy.getSizeInBits();
9182  if (FloatBits != 32 && FloatBits != 64)
9183  return SDValue();
9184 
9185  // Avoid conversions where iN is larger than the float (e.g., i64 -> float).
9186  if (IntBits > FloatBits)
9187  return SDValue();
9188 
9189  BitVector UndefElements;
9190  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
9191  int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, FloatBits + 1);
9192  if (C == -1 || C == 0 || C > FloatBits)
9193  return SDValue();
9194 
9195  MVT ResTy;
9196  unsigned NumLanes = Op.getValueType().getVectorNumElements();
9197  switch (NumLanes) {
9198  default:
9199  return SDValue();
9200  case 2:
9201  ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
9202  break;
9203  case 4:
9204  ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
9205  break;
9206  }
9207 
9208  if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps())
9209  return SDValue();
9210 
9211  SDLoc DL(N);
9212  SDValue ConvInput = Op.getOperand(0);
9213  bool IsSigned = Opc == ISD::SINT_TO_FP;
9214  if (IntBits < FloatBits)
9215  ConvInput = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
9216  ResTy, ConvInput);
9217 
9218  unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfxs2fp
9220  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
9221  DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput,
9222  DAG.getConstant(C, DL, MVT::i32));
9223 }
9224 
9225 /// An EXTR instruction is made up of two shifts, ORed together. This helper
9226 /// searches for and classifies those shifts.
9227 static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
9228  bool &FromHi) {
9229  if (N.getOpcode() == ISD::SHL)
9230  FromHi = false;
9231  else if (N.getOpcode() == ISD::SRL)
9232  FromHi = true;
9233  else
9234  return false;
9235 
9236  if (!isa<ConstantSDNode>(N.getOperand(1)))
9237  return false;
9238 
9239  ShiftAmount = N->getConstantOperandVal(1);
9240  Src = N->getOperand(0);
9241  return true;
9242 }
9243 
9244 /// EXTR instruction extracts a contiguous chunk of bits from two existing
9245 /// registers viewed as a high/low pair. This function looks for the pattern:
9246 /// <tt>(or (shl VAL1, \#N), (srl VAL2, \#RegWidth-N))</tt> and replaces it
9247 /// with an EXTR. Can't quite be done in TableGen because the two immediates
9248 /// aren't independent.
9251  SelectionDAG &DAG = DCI.DAG;
9252  SDLoc DL(N);
9253  EVT VT = N->getValueType(0);
9254 
9255  assert(N->getOpcode() == ISD::OR && "Unexpected root");
9256 
9257  if (VT != MVT::i32 && VT != MVT::i64)
9258  return SDValue();
9259 
9260  SDValue LHS;
9261  uint32_t ShiftLHS = 0;
9262  bool LHSFromHi = false;
9263  if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
9264  return SDValue();
9265 
9266  SDValue RHS;
9267  uint32_t ShiftRHS = 0;
9268  bool RHSFromHi = false;
9269  if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
9270  return SDValue();
9271 
9272  // If they're both trying to come from the high part of the register, they're
9273  // not really an EXTR.
9274  if (LHSFromHi == RHSFromHi)
9275  return SDValue();
9276 
9277  if (ShiftLHS + ShiftRHS != VT.getSizeInBits())
9278  return SDValue();
9279 
9280  if (LHSFromHi) {
9281  std::swap(LHS, RHS);
9282  std::swap(ShiftLHS, ShiftRHS);
9283  }
9284 
9285  return DAG.getNode(AArch64ISD::EXTR, DL, VT, LHS, RHS,
9286  DAG.getConstant(ShiftRHS, DL, MVT::i64));
9287 }
9288 
9291  EVT VT = N->getValueType(0);
9292  SelectionDAG &DAG = DCI.DAG;
9293  SDLoc DL(N);
9294 
9295  if (!VT.isVector())
9296  return SDValue();
9297 
9298  SDValue N0 = N->getOperand(0);
9299  if (N0.getOpcode() != ISD::AND)
9300  return SDValue();
9301 
9302  SDValue N1 = N->getOperand(1);
9303  if (N1.getOpcode() != ISD::AND)
9304  return SDValue();
9305 
9306  // We only have to look for constant vectors here since the general, variable
9307  // case can be handled in TableGen.
9308  unsigned Bits = VT.getScalarSizeInBits();
9309  uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1);
9310  for (int i = 1; i >= 0; --i)
9311  for (int j = 1; j >= 0; --j) {
9314  if (!BVN0 || !BVN1)
9315  continue;
9316 
9317  bool FoundMatch = true;
9318  for (unsigned k = 0; k < VT.getVectorNumElements(); ++k) {
9320  ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(BVN1->getOperand(k));
9321  if (!CN0 || !CN1 ||
9322  CN0->getZExtValue() != (BitMask & ~CN1->getZExtValue())) {
9323  FoundMatch = false;
9324  break;
9325  }
9326  }
9327 
9328  if (FoundMatch)
9329  return DAG.getNode(AArch64ISD::BSL, DL, VT, SDValue(BVN0, 0),
9330  N0->getOperand(1 - i), N1->getOperand(1 - j));
9331  }
9332 
9333  return SDValue();
9334 }
9335 
9337  const AArch64Subtarget *Subtarget) {
9338  // Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
9339  SelectionDAG &DAG = DCI.DAG;
9340  EVT VT = N->getValueType(0);
9341 
9342  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
9343  return SDValue();
9344 
9345  if (SDValue Res = tryCombineToEXTR(N, DCI))
9346  return Res;
9347 
9348  if (SDValue Res = tryCombineToBSL(N, DCI))
9349  return Res;
9350 
9351  return SDValue();
9352 }
9353 
9356  SelectionDAG &DAG = DCI.DAG;
9357  EVT VT = N->getValueType(0);
9358  if (VT != MVT::i32 && VT != MVT::i64)
9359  return SDValue();
9360 
9361  // Canonicalize (srl (bswap i32 x), 16) to (rotr (bswap i32 x), 16), if the
9362  // high 16-bits of x are zero. Similarly, canonicalize (srl (bswap i64 x), 32)
9363  // to (rotr (bswap i64 x), 32), if the high 32-bits of x are zero.
9364  SDValue N0 = N->getOperand(0);
9365  if (N0.getOpcode() == ISD::BSWAP) {
9366  SDLoc DL(N);
9367  SDValue N1 = N->getOperand(1);
9368  SDValue N00 = N0.getOperand(0);
9369  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
9370  uint64_t ShiftAmt = C->getZExtValue();
9371  if (VT == MVT::i32 && ShiftAmt == 16 &&
9372  DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(32, 16)))
9373  return DAG.getNode(ISD::ROTR, DL, VT, N0, N1);
9374  if (VT == MVT::i64 && ShiftAmt == 32 &&
9375  DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(64, 32)))
9376  return DAG.getNode(ISD::ROTR, DL, VT, N0, N1);
9377  }
9378  }
9379  return SDValue();
9380 }
9381 
9384  SelectionDAG &DAG) {
9385  // Wait 'til after everything is legalized to try this. That way we have
9386  // legal vector types and such.
9387  if (DCI.isBeforeLegalizeOps())
9388  return SDValue();
9389 
9390  // Remove extraneous bitcasts around an extract_subvector.
9391  // For example,
9392  // (v4i16 (bitconvert
9393  // (extract_subvector (v2i64 (bitconvert (v8i16 ...)), (i64 1)))))
9394  // becomes
9395  // (extract_subvector ((v8i16 ...), (i64 4)))
9396 
9397  // Only interested in 64-bit vectors as the ultimate result.
9398  EVT VT = N->getValueType(0);
9399  if (!VT.isVector())
9400  return SDValue();
9401  if (VT.getSimpleVT().getSizeInBits() != 64)
9402  return SDValue();
9403  // Is the operand an extract_subvector starting at the beginning or halfway
9404  // point of the vector? A low half may also come through as an
9405  // EXTRACT_SUBREG, so look for that, too.
9406  SDValue Op0 = N->getOperand(0);
9407  if (Op0->getOpcode() != ISD::EXTRACT_SUBVECTOR &&
9408  !(Op0->isMachineOpcode() &&
9409  Op0->getMachineOpcode() == AArch64::EXTRACT_SUBREG))
9410  return SDValue();
9411  uint64_t idx = cast<ConstantSDNode>(Op0->getOperand(1))->getZExtValue();
9412  if (Op0->getOpcode() == ISD::EXTRACT_SUBVECTOR) {
9413  if (Op0->getValueType(0).getVectorNumElements() != idx && idx != 0)
9414  return SDValue();
9415  } else if (Op0->getMachineOpcode() == AArch64::EXTRACT_SUBREG) {
9416  if (idx != AArch64::dsub)
9417  return SDValue();
9418  // The dsub reference is equivalent to a lane zero subvector reference.
9419  idx = 0;
9420  }
9421  // Look through the bitcast of the input to the extract.
9422  if (Op0->getOperand(0)->getOpcode() != ISD::BITCAST)
9423  return SDValue();
9424  SDValue Source = Op0->getOperand(0)->getOperand(0);
9425  // If the source type has twice the number of elements as our destination
9426  // type, we know this is an extract of the high or low half of the vector.
9427  EVT SVT = Source->getValueType(0);
9428  if (!SVT.isVector() ||
9429  SVT.getVectorNumElements() != VT.getVectorNumElements() * 2)
9430  return SDValue();
9431 
9432  LLVM_DEBUG(
9433  dbgs() << "aarch64-lower: bitcast extract_subvector simplification\n");
9434 
9435  // Create the simplified form to just extract the low or high half of the
9436  // vector directly rather than bothering with the bitcasts.
9437  SDLoc dl(N);
9438  unsigned NumElements = VT.getVectorNumElements();
9439  if (idx) {
9440  SDValue HalfIdx = DAG.getConstant(NumElements, dl, MVT::i64);
9441  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Source, HalfIdx);
9442  } else {
9443  SDValue SubReg = DAG.getTargetConstant(AArch64::dsub, dl, MVT::i32);
9444  return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, VT,
9445  Source, SubReg),
9446  0);
9447  }
9448 }
9449 
9452  SelectionDAG &DAG) {
9453  SDLoc dl(N);
9454  EVT VT = N->getValueType(0);
9455  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
9456 
9457  // Optimize concat_vectors of truncated vectors, where the intermediate
9458  // type is illegal, to avoid said illegality, e.g.,
9459  // (v4i16 (concat_vectors (v2i16 (truncate (v2i64))),
9460  // (v2i16 (truncate (v2i64)))))
9461  // ->
9462  // (v4i16 (truncate (vector_shuffle (v4i32 (bitcast (v2i64))),
9463  // (v4i32 (bitcast (v2i64))),
9464  // <0, 2, 4, 6>)))
9465  // This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed
9466  // on both input and result type, so we might generate worse code.
9467  // On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8.
9468  if (N->getNumOperands() == 2 &&
9469  N0->getOpcode() == ISD::TRUNCATE &&
9470  N1->getOpcode() == ISD::TRUNCATE) {
9471  SDValue N00 = N0->getOperand(0);
9472  SDValue N10 = N1->getOperand(0);
9473  EVT N00VT = N00.getValueType();
9474 
9475  if (N00VT == N10.getValueType() &&
9476  (N00VT == MVT::v2i64 || N00VT == MVT::v4i32) &&
9477  N00VT.getScalarSizeInBits() == 4 * VT.getScalarSizeInBits()) {
9478  MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16);
9480  for (size_t i = 0; i < Mask.size(); ++i)
9481  Mask[i] = i * 2;
9482  return DAG.getNode(ISD::TRUNCATE, dl, VT,
9483  DAG.getVectorShuffle(
9484  MidVT, dl,
9485  DAG.getNode(ISD::BITCAST, dl, MidVT, N00),
9486  DAG.getNode(ISD::BITCAST, dl, MidVT, N10), Mask));
9487  }
9488  }
9489 
9490  // Wait 'til after everything is legalized to try this. That way we have
9491  // legal vector types and such.
9492  if (DCI.isBeforeLegalizeOps())
9493  return SDValue();
9494 
9495  // If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
9496  // splat. The indexed instructions are going to be expecting a DUPLANE64, so
9497  // canonicalise to that.
9498  if (N0 == N1 && VT.getVectorNumElements() == 2) {
9499  assert(VT.getScalarSizeInBits() == 64);
9500  return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N0, DAG),
9501  DAG.getConstant(0, dl, MVT::i64));
9502  }
9503 
9504  // Canonicalise concat_vectors so that the right-hand vector has as few
9505  // bit-casts as possible before its real operation. The primary matching
9506  // destination for these operations will be the narrowing "2" instructions,
9507  // which depend on the operation being performed on this right-hand vector.
9508  // For example,
9509  // (concat_vectors LHS, (v1i64 (bitconvert (v4i16 RHS))))
9510  // becomes
9511  // (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS))
9512 
9513  if (N1->getOpcode() != ISD::BITCAST)
9514  return SDValue();
9515  SDValue RHS = N1->getOperand(0);
9516  MVT RHSTy = RHS.getValueType().getSimpleVT();
9517  // If the RHS is not a vector, this is not the pattern we're looking for.
9518  if (!RHSTy.isVector())
9519  return SDValue();
9520 
9521  LLVM_DEBUG(
9522  dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n");
9523 
9524  MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(),
9525  RHSTy.getVectorNumElements() * 2);
9526  return DAG.getNode(ISD::BITCAST, dl, VT,
9527  DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
9528  DAG.getNode(ISD::BITCAST, dl, RHSTy, N0),
9529  RHS));
9530 }
9531 
9534  SelectionDAG &DAG) {
9535  // Wait until after everything is legalized to try this. That way we have
9536  // legal vector types and such.
9537  if (DCI.isBeforeLegalizeOps())
9538  return SDValue();
9539  // Transform a scalar conversion of a value from a lane extract into a
9540  // lane extract of a vector conversion. E.g., from foo1 to foo2:
9541  // double foo1(int64x2_t a) { return vcvtd_n_f64_s64(a[1], 9); }
9542  // double foo2(int64x2_t a) { return vcvtq_n_f64_s64(a, 9)[1]; }
9543  //
9544  // The second form interacts better with instruction selection and the
9545  // register allocator to avoid cross-class register copies that aren't
9546  // coalescable due to a lane reference.
9547 
9548  // Check the operand and see if it originates from a lane extract.
9549  SDValue Op1 = N->getOperand(1);
9550  if (Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
9551  // Yep, no additional predication needed. Perform the transform.
9552  SDValue IID = N->getOperand(0);
9553  SDValue Shift = N->getOperand(2);
9554  SDValue Vec = Op1.getOperand(0);
9555  SDValue Lane = Op1.getOperand(1);
9556  EVT ResTy = N->getValueType(0);
9557  EVT VecResTy;
9558  SDLoc DL(N);
9559 
9560  // The vector width should be 128 bits by the time we get here, even
9561  // if it started as 64 bits (the extract_vector handling will have
9562  // done so).
9563  assert(Vec.getValueSizeInBits() == 128 &&
9564  "unexpected vector size on extract_vector_elt!");
9565  if (Vec.getValueType() == MVT::v4i32)
9566  VecResTy = MVT::v4f32;
9567  else if (Vec.getValueType() == MVT::v2i64)
9568  VecResTy = MVT::v2f64;
9569  else
9570  llvm_unreachable("unexpected vector type!");
9571 
9572  SDValue Convert =
9573  DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift);
9574  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResTy, Convert, Lane);
9575  }
9576  return SDValue();
9577 }
9578 
9579 // AArch64 high-vector "long" operations are formed by performing the non-high
9580 // version on an extract_subvector of each operand which gets the high half:
9581 //
9582 // (longop2 LHS, RHS) == (longop (extract_high LHS), (extract_high RHS))
9583 //
9584 // However, there are cases which don't have an extract_high explicitly, but
9585 // have another operation that can be made compatible with one for free. For
9586 // example:
9587 //
9588 // (dupv64 scalar) --> (extract_high (dup128 scalar))
9589 //
9590 // This routine does the actual conversion of such DUPs, once outer routines
9591 // have determined that everything else is in order.
9592 // It also supports immediate DUP-like nodes (MOVI/MVNi), which we can fold
9593 // similarly here.
9595  switch (N.getOpcode()) {
9596  case AArch64ISD::DUP:
9597  case AArch64ISD::DUPLANE8:
9598  case AArch64ISD::DUPLANE16:
9599  case AArch64ISD::DUPLANE32:
9600  case AArch64ISD::DUPLANE64:
9601  case AArch64ISD::MOVI:
9602  case AArch64ISD::MOVIshift:
9603  case AArch64ISD::MOVIedit:
9604  case AArch64ISD::MOVImsl:
9605  case AArch64ISD::MVNIshift:
9606  case AArch64ISD::MVNImsl:
9607  break;
9608  default:
9609  // FMOV could be supported, but isn't very useful, as it would only occur
9610  // if you passed a bitcast' floating point immediate to an eligible long
9611  // integer op (addl, smull, ...).
9612  return SDValue();
9613  }
9614 
9615  MVT NarrowTy = N.getSimpleValueType();
9616  if (!NarrowTy.is64BitVector())
9617  return SDValue();
9618 
9619  MVT ElementTy = NarrowTy.getVectorElementType();
9620  unsigned NumElems = NarrowTy.getVectorNumElements();
9621  MVT NewVT = MVT::getVectorVT(ElementTy, NumElems * 2);
9622 
9623  SDLoc dl(N);
9624  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NarrowTy,
9625  DAG.getNode(N->getOpcode(), dl, NewVT, N->ops()),
9626  DAG.getConstant(NumElems, dl, MVT::i64));
9627 }
9628 
9630  if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR)
9631  return true;
9632 
9633  return N.getOpcode() == ISD::BITCAST &&
9635 }
9636 
9637 /// Helper structure to keep track of ISD::SET_CC operands.
9639  const SDValue *Opnd0;
9640  const SDValue *Opnd1;
9642 };
9643 
9644 /// Helper structure to keep track of a SET_CC lowered into AArch64 code.
9646  const SDValue *Cmp;
9648 };
9649 
9650 /// Helper structure to keep track of SetCC information.
9651 union SetCCInfo {
9654 };
9655 
9656 /// Helper structure to be able to read SetCC information. If set to
9657 /// true, IsAArch64 field, Info is a AArch64SetCCInfo, otherwise Info is a
9658 /// GenericSetCCInfo.
9662 };
9663 
9664 /// Check whether or not \p Op is a SET_CC operation, either a generic or
9665 /// an
9666 /// AArch64 lowered one.
9667 /// \p SetCCInfo is filled accordingly.
9668 /// \post SetCCInfo is meanginfull only when this function returns true.
9669 /// \return True when Op is a kind of SET_CC operation.
9671  // If this is a setcc, this is straight forward.
9672  if (Op.getOpcode() == ISD::SETCC) {
9673  SetCCInfo.Info.Generic.Opnd0 = &Op.getOperand(0);
9674  SetCCInfo.Info.Generic.Opnd1 = &Op.getOperand(1);
9675  SetCCInfo.Info.Generic.CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
9676  SetCCInfo.IsAArch64 = false;
9677  return true;
9678  }
9679  // Otherwise, check if this is a matching csel instruction.
9680  // In other words:
9681  // - csel 1, 0, cc
9682  // - csel 0, 1, !cc
9683  if (Op.getOpcode() != AArch64ISD::CSEL)
9684  return false;
9685  // Set the information about the operands.
9686  // TODO: we want the operands of the Cmp not the csel
9687  SetCCInfo.Info.AArch64.Cmp = &Op.getOperand(3);
9688  SetCCInfo.IsAArch64 = true;
9689  SetCCInfo.Info.AArch64.CC = static_cast<AArch64CC::CondCode>(
9690  cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
9691 
9692  // Check that the operands matches the constraints:
9693  // (1) Both operands must be constants.
9694  // (2) One must be 1 and the other must be 0.
9697 
9698  // Check (1).
9699  if (!TValue || !FValue)
9700  return false;
9701 
9702  // Check (2).
9703  if (!TValue->isOne()) {
9704  // Update the comparison when we are interested in !cc.
9705  std::swap(TValue, FValue);
9706  SetCCInfo.Info.AArch64.CC =
9708  }
9709  return TValue->isOne() && FValue->isNullValue();
9710 }
9711 
9712 // Returns true if Op is setcc or zext of setcc.
9714  if (isSetCC(Op, Info))
9715  return true;
9716  return ((Op.getOpcode() == ISD::ZERO_EXTEND) &&
9717  isSetCC(Op->getOperand(0), Info));
9718 }
9719 
9720 // The folding we want to perform is:
9721 // (add x, [zext] (setcc cc ...) )
9722 // -->
9723 // (csel x, (add x, 1), !cc ...)
9724 //
9725 // The latter will get matched to a CSINC instruction.
9727  assert(Op && Op->getOpcode() == ISD::ADD && "Unexpected operation!");
9728  SDValue LHS = Op->getOperand(0);
9729  SDValue RHS = Op->getOperand(1);
9730  SetCCInfoAndKind InfoAndKind;
9731 
9732  // If neither operand is a SET_CC, give up.
9733  if (!isSetCCOrZExtSetCC(LHS, InfoAndKind)) {
9734  std::swap(LHS, RHS);
9735  if (!isSetCCOrZExtSetCC(LHS, InfoAndKind))
9736  return SDValue();
9737  }
9738 
9739  // FIXME: This could be generatized to work for FP comparisons.
9740  EVT CmpVT = InfoAndKind.IsAArch64
9741  ? InfoAndKind.Info.AArch64.Cmp->getOperand(0).getValueType()
9742  : InfoAndKind.Info.Generic.Opnd0->getValueType();
9743  if (CmpVT != MVT::i32 && CmpVT != MVT::i64)
9744  return SDValue();
9745 
9746  SDValue CCVal;
9747  SDValue Cmp;
9748  SDLoc dl(Op);
9749  if (InfoAndKind.IsAArch64) {
9750  CCVal = DAG.getConstant(
9751  AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), dl,
9752  MVT::i32);
9753  Cmp = *InfoAndKind.Info.AArch64.Cmp;
9754  } else
9755  Cmp = getAArch64Cmp(*InfoAndKind.Info.Generic.Opnd0,
9756  *InfoAndKind.Info.Generic.Opnd1,
9757  ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, true),
9758  CCVal, DAG, dl);
9759 
9760  EVT VT = Op->getValueType(0);
9761  LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, dl, VT));
9762  return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
9763 }
9764 
9765 // The basic add/sub long vector instructions have variants with "2" on the end
9766 // which act on the high-half of their inputs. They are normally matched by
9767 // patterns like:
9768 //
9769 // (add (zeroext (extract_high LHS)),
9770 // (zeroext (extract_high RHS)))
9771 // -> uaddl2 vD, vN, vM
9772 //
9773 // However, if one of the extracts is something like a duplicate, this
9774 // instruction can still be used profitably. This function puts the DAG into a
9775 // more appropriate form for those patterns to trigger.
9778  SelectionDAG &DAG) {
9779  if (DCI.isBeforeLegalizeOps())
9780  return SDValue();
9781 
9782  MVT VT = N->getSimpleValueType(0);
9783  if (!VT.is128BitVector()) {
9784  if (N->getOpcode() == ISD::ADD)
9785  return performSetccAddFolding(N, DAG);
9786  return SDValue();
9787  }
9788 
9789  // Make sure both branches are extended in the same way.
9790  SDValue LHS = N->getOperand(0);
9791  SDValue RHS = N->getOperand(1);
9792  if ((LHS.getOpcode() != ISD::ZERO_EXTEND &&
9793  LHS.getOpcode() != ISD::SIGN_EXTEND) ||
9794  LHS.getOpcode() != RHS.getOpcode())
9795  return SDValue();
9796 
9797  unsigned ExtType = LHS.getOpcode();
9798 
9799  // It's not worth doing if at least one of the inputs isn't already an
9800  // extract, but we don't know which it'll be so we have to try both.
9802  RHS = tryExtendDUPToExtractHigh(RHS.getOperand(0), DAG);
9803  if (!RHS.getNode())
9804  return SDValue();
9805 
9806  RHS = DAG.getNode(ExtType, SDLoc(N), VT, RHS);
9807  } else if (isEssentiallyExtractSubvector(RHS.getOperand(0))) {
9808  LHS = tryExtendDUPToExtractHigh(LHS.getOperand(0), DAG);
9809  if (!LHS.getNode())
9810  return SDValue();
9811 
9812  LHS = DAG.getNode(ExtType, SDLoc(N), VT, LHS);
9813  }
9814 
9815  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS);
9816 }
9817 
9818 // Massage DAGs which we can use the high-half "long" operations on into
9819 // something isel will recognize better. E.g.
9820 //
9821 // (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) -->
9822 // (aarch64_neon_umull (extract_high (v2i64 vec)))
9823 // (extract_high (v2i64 (dup128 scalar)))))
9824 //
9825 static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
9827  SelectionDAG &DAG) {
9828  if (DCI.isBeforeLegalizeOps())
9829  return SDValue();
9830 
9831  SDValue LHS = N->getOperand(1);
9832  SDValue RHS = N->getOperand(2);
9833  assert(LHS.getValueType().is64BitVector() &&
9834  RHS.getValueType().is64BitVector() &&
9835  "unexpected shape for long operation");
9836 
9837  // Either node could be a DUP, but it's not worth doing both of them (you'd
9838  // just as well use the non-high version) so look for a corresponding extract
9839  // operation on the other "wing".
9840  if (isEssentiallyExtractSubvector(LHS)) {
9841  RHS = tryExtendDUPToExtractHigh(RHS, DAG);
9842  if (!RHS.getNode())
9843  return SDValue();
9844  } else if (isEssentiallyExtractSubvector(RHS)) {
9845  LHS = tryExtendDUPToExtractHigh(LHS, DAG);
9846  if (!LHS.getNode())
9847  return SDValue();
9848  }
9849 
9850  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
9851  N->getOperand(0), LHS, RHS);
9852 }
9853 
9854 static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
9855  MVT ElemTy = N->getSimpleValueType(0).getScalarType();
9856  unsigned ElemBits = ElemTy.getSizeInBits();
9857 
9858  int64_t ShiftAmount;
9859  if (BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(2))) {
9860  APInt SplatValue, SplatUndef;
9861  unsigned SplatBitSize;
9862  bool HasAnyUndefs;
9863  if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
9864  HasAnyUndefs, ElemBits) ||
9865  SplatBitSize != ElemBits)
9866  return SDValue();
9867 
9868  ShiftAmount = SplatValue.getSExtValue();
9869  } else if (ConstantSDNode *CVN = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
9870  ShiftAmount = CVN->getSExtValue();
9871  } else
9872  return SDValue();
9873 
9874  unsigned Opcode;
9875  bool IsRightShift;
9876  switch (IID) {
9877  default:
9878  llvm_unreachable("Unknown shift intrinsic");
9880  Opcode = AArch64ISD::SQSHL_I;
9881  IsRightShift = false;
9882  break;
9884  Opcode = AArch64ISD::UQSHL_I;
9885  IsRightShift = false;
9886  break;
9888  Opcode = AArch64ISD::SRSHR_I;
9889  IsRightShift = true;
9890  break;
9892  Opcode = AArch64ISD::URSHR_I;
9893  IsRightShift = true;
9894  break;
9896  Opcode = AArch64ISD::SQSHLU_I;
9897  IsRightShift = false;
9898  break;
9899  }
9900 
9901  if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) {
9902  SDLoc dl(N);
9903  return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
9904  DAG.getConstant(-ShiftAmount, dl, MVT::i32));
9905  } else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) {
9906  SDLoc dl(N);
9907  return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
9908  DAG.getConstant(ShiftAmount, dl, MVT::i32));
9909  }
9910 
9911  return SDValue();
9912 }
9913 
9914 // The CRC32[BH] instructions ignore the high bits of their data operand. Since
9915 // the intrinsics must be legal and take an i32, this means there's almost
9916 // certainly going to be a zext in the DAG which we can eliminate.
9917 static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) {
9918  SDValue AndN = N->getOperand(2);
9919  if (AndN.getOpcode() != ISD::AND)
9920  return SDValue();
9921 
9922  ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(AndN.getOperand(1));
9923  if (!CMask || CMask->getZExtValue() != Mask)
9924  return SDValue();
9925 
9927  N->getOperand(0), N->getOperand(1), AndN.getOperand(0));
9928 }
9929 
9931  SelectionDAG &DAG) {
9932  SDLoc dl(N);
9933  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0),
9934  DAG.getNode(Opc, dl,
9936  N->getOperand(1)),
9937  DAG.getConstant(0, dl, MVT::i64));
9938 }
9939 
9942  const AArch64Subtarget *Subtarget) {
9943  SelectionDAG &DAG = DCI.DAG;
9944  unsigned IID = getIntrinsicID(N);
9945  switch (IID) {
9946  default:
9947  break;
9950  return tryCombineFixedPointConvert(N, DCI, DAG);
9964  return DAG.getNode(ISD::FMAXIMUM, SDLoc(N), N->getValueType(0),
9965  N->getOperand(1), N->getOperand(2));
9967  return DAG.getNode(ISD::FMINIMUM, SDLoc(N), N->getValueType(0),
9968  N->getOperand(1), N->getOperand(2));
9970  return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0),
9971  N->getOperand(1), N->getOperand(2));
9973  return DAG.getNode(ISD::FMINNUM, SDLoc(N), N->getValueType(0),
9974  N->getOperand(1), N->getOperand(2));
9979  return tryCombineLongOpWithDup(IID, N, DCI, DAG);
9985  return tryCombineShiftImm(IID, N, DAG);
9988  return tryCombineCRC32(0xff, N, DAG);
9991  return tryCombineCRC32(0xffff, N, DAG);
9992  }
9993  return SDValue();
9994 }
9995 
9998  SelectionDAG &DAG) {
9999  // If we see something like (zext (sabd (extract_high ...), (DUP ...))) then
10000  // we can convert that DUP into another extract_high (of a bigger DUP), which
10001  // helps the backend to decide that an sabdl2 would be useful, saving a real
10002  // extract_high operation.
10003  if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
10005  SDNode *ABDNode = N->getOperand(0).getNode();
10006  unsigned IID = getIntrinsicID(ABDNode);
10007  if (IID == Intrinsic::aarch64_neon_sabd ||
10009  SDValue NewABD = tryCombineLongOpWithDup(IID, ABDNode, DCI, DAG);
10010  if (!NewABD.getNode())
10011  return SDValue();
10012 
10013  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0),
10014  NewABD);
10015  }
10016  }
10017 
10018  // This is effectively a custom type legalization for AArch64.
10019  //
10020  // Type legalization will split an extend of a small, legal, type to a larger
10021  // illegal type by first splitting the destination type, often creating
10022  // illegal source types, which then get legalized in isel-confusing ways,
10023  // leading to really terrible codegen. E.g.,
10024  // %result = v8i32 sext v8i8 %value
10025  // becomes
10026  // %losrc = extract_subreg %value, ...
10027  // %hisrc = extract_subreg %value, ...
10028  // %lo = v4i32 sext v4i8 %losrc
10029  // %hi = v4i32 sext v4i8 %hisrc
10030  // Things go rapidly downhill from there.
10031  //
10032  // For AArch64, the [sz]ext vector instructions can only go up one element
10033  // size, so we can, e.g., extend from i8 to i16, but to go from i8 to i32
10034  // take two instructions.
10035  //
10036  // This implies that the most efficient way to do the extend from v8i8
10037  // to two v4i32 values is to first extend the v8i8 to v8i16, then do
10038  // the normal splitting to happen for the v8i16->v8i32.
10039 
10040  // This is pre-legalization to catch some cases where the default
10041  // type legalization will create ill-tempered code.
10042  if (!DCI.isBeforeLegalizeOps())
10043  return SDValue();
10044 
10045  // We're only interested in cleaning things up for non-legal vector types
10046  // here. If both the source and destination are legal, things will just
10047  // work naturally without any fiddling.
10048  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10049  EVT ResVT = N->getValueType(0);
10050  if (!ResVT.isVector() || TLI.isTypeLegal(ResVT))
10051  return SDValue();
10052  // If the vector type isn't a simple VT, it's beyond the scope of what
10053  // we're worried about here. Let legalization do its thing and hope for
10054  // the best.
10055  SDValue Src = N->getOperand(0);
10056  EVT SrcVT = Src->getValueType(0);
10057  if (!ResVT.isSimple() || !SrcVT.isSimple())
10058  return SDValue();
10059 
10060  // If the source VT is a 64-bit vector, we can play games and get the
10061  // better results we want.
10062  if (SrcVT.getSizeInBits() != 64)
10063  return SDValue();
10064 
10065  unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
10066  unsigned ElementCount = SrcVT.getVectorNumElements();
10067  SrcVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize * 2), ElementCount);
10068  SDLoc DL(N);
10069  Src = DAG.getNode(N->getOpcode(), DL, SrcVT, Src);
10070 
10071  // Now split the rest of the operation into two halves, each with a 64
10072  // bit source.
10073  EVT LoVT, HiVT;
10074  SDValue Lo, Hi;
10075  unsigned NumElements = ResVT.getVectorNumElements();
10076  assert(!(NumElements & 1) && "Splitting vector, but not in half!");
10077  LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(),
10078  ResVT.getVectorElementType(), NumElements / 2);
10079 
10080  EVT InNVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(),
10081  LoVT.getVectorNumElements());
10082  Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
10083  DAG.getConstant(0, DL, MVT::i64));
10084  Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
10085  DAG.getConstant(InNVT.getVectorNumElements(), DL, MVT::i64));
10086  Lo = DAG.getNode(N->getOpcode(), DL, LoVT, Lo);
10087  Hi = DAG.getNode(N->getOpcode(), DL, HiVT, Hi);
10088 
10089  // Now combine the parts back together so we still have a single result
10090  // like the combiner expects.
10091  return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
10092 }
10093 
10095  SDValue SplatVal, unsigned NumVecElts) {
10096  assert(!St.isTruncatingStore() && "cannot split truncating vector store");
10097  unsigned OrigAlignment = St.getAlignment();
10098  unsigned EltOffset = SplatVal.getValueType().getSizeInBits() / 8;
10099 
10100  // Create scalar stores. This is at least as good as the code sequence for a
10101  // split unaligned store which is a dup.s, ext.b, and two stores.
10102  // Most of the time the three stores should be replaced by store pair
10103  // instructions (stp).
10104  SDLoc DL(&St);
10105  SDValue BasePtr = St.getBasePtr();
10106  uint64_t BaseOffset = 0;
10107 
10108  const MachinePointerInfo &PtrInfo = St.getPointerInfo();
10109  SDValue NewST1 =
10110  DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, PtrInfo,
10111  OrigAlignment, St.getMemOperand()->getFlags());
10112 
10113  // As this in ISel, we will not merge this add which may degrade results.
10114  if (BasePtr->getOpcode() == ISD::ADD &&
10115  isa<ConstantSDNode>(BasePtr->getOperand(1))) {
10116  BaseOffset = cast<ConstantSDNode>(BasePtr->getOperand(1))->getSExtValue();
10117  BasePtr = BasePtr->getOperand(0);
10118  }
10119 
10120  unsigned Offset = EltOffset;
10121  while (--NumVecElts) {
10122  unsigned Alignment = MinAlign(OrigAlignment, Offset);
10123  SDValue OffsetPtr =
10124  DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
10125  DAG.getConstant(BaseOffset + Offset, DL, MVT::i64));
10126  NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr,
10127  PtrInfo.getWithOffset(Offset), Alignment,
10128  St.getMemOperand()->getFlags());
10129  Offset += EltOffset;
10130  }
10131  return NewST1;
10132 }
10133 
10134 /// Replace a splat of zeros to a vector store by scalar stores of WZR/XZR. The
10135 /// load store optimizer pass will merge them to store pair stores. This should
10136 /// be better than a movi to create the vector zero followed by a vector store
10137 /// if the zero constant is not re-used, since one instructions and one register
10138 /// live range will be removed.
10139 ///
10140 /// For example, the final generated code should be:
10141 ///
10142 /// stp xzr, xzr, [x0]
10143 ///
10144 /// instead of:
10145 ///
10146 /// movi v0.2d, #0
10147 /// str q0, [x0]
10148 ///
10150  SDValue StVal = St.getValue();
10151  EVT VT = StVal.getValueType();
10152 
10153  // It is beneficial to scalarize a zero splat store for 2 or 3 i64 elements or
10154  // 2, 3 or 4 i32 elements.
10155  int NumVecElts = VT.getVectorNumElements();
10156  if (!(((NumVecElts == 2 || NumVecElts == 3) &&
10157  VT.getVectorElementType().getSizeInBits() == 64) ||
10158  ((NumVecElts == 2 || NumVecElts == 3 || NumVecElts == 4) &&
10159  VT.getVectorElementType().getSizeInBits() == 32)))
10160  return SDValue();
10161 
10162  if (StVal.getOpcode() != ISD::BUILD_VECTOR)
10163  return SDValue();
10164 
10165  // If the zero constant has more than one use then the vector store could be
10166  // better since the constant mov will be amortized and stp q instructions
10167  // should be able to be formed.
10168  if (!StVal.hasOneUse())
10169  return SDValue();
10170 
10171  // If the store is truncating then it's going down to i16 or smaller, which
10172  // means it can be implemented in a single store anyway.
10173  if (St.isTruncatingStore())
10174  return SDValue();
10175 
10176  // If the immediate offset of the address operand is too large for the stp
10177  // instruction, then bail out.
10178  if (DAG.isBaseWithConstantOffset(St.getBasePtr())) {
10179  int64_t Offset = St.getBasePtr()->getConstantOperandVal(1);
10180  if (Offset < -512 || Offset > 504)
10181  return SDValue();
10182  }
10183 
10184  for (int I = 0; I < NumVecElts; ++I) {
10185  SDValue EltVal = StVal.getOperand(I);
10186  if (!isNullConstant(EltVal) && !isNullFPConstant(EltVal))
10187  return SDValue();
10188  }
10189 
10190  // Use a CopyFromReg WZR/XZR here to prevent
10191  // DAGCombiner::MergeConsecutiveStores from undoing this transformation.
10192  SDLoc DL(&St);
10193  unsigned ZeroReg;
10194  EVT ZeroVT;
10195  if (VT.getVectorElementType().getSizeInBits() == 32) {
10196  ZeroReg = AArch64::WZR;
10197  ZeroVT = MVT::i32;
10198  } else {
10199  ZeroReg = AArch64::XZR;
10200  ZeroVT = MVT::i64;
10201  }
10202  SDValue SplatVal =
10203  DAG.getCopyFromReg(DAG.getEntryNode(), DL, ZeroReg, ZeroVT);
10204  return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
10205 }
10206 
10207 /// Replace a splat of a scalar to a vector store by scalar stores of the scalar
10208 /// value. The load store optimizer pass will merge them to store pair stores.
10209 /// This has better performance than a splat of the scalar followed by a split
10210 /// vector store. Even if the stores are not merged it is four stores vs a dup,
10211 /// followed by an ext.b and two stores.
10213  SDValue StVal = St.getValue();
10214  EVT VT = StVal.getValueType();
10215 
10216  // Don't replace floating point stores, they possibly won't be transformed to
10217  // stp because of the store pair suppress pass.
10218  if (VT.isFloatingPoint())
10219  return SDValue();
10220 
10221  // We can express a splat as store pair(s) for 2 or 4 elements.
10222  unsigned NumVecElts = VT.getVectorNumElements();
10223  if (NumVecElts != 4 && NumVecElts != 2)
10224  return SDValue();
10225 
10226  // If the store is truncating then it's going down to i16 or smaller, which
10227  // means it can be implemented in a single store anyway.
10228  if (St.isTruncatingStore())
10229  return SDValue();
10230 
10231  // Check that this is a splat.
10232  // Make sure that each of the relevant vector element locations are inserted
10233  // to, i.e. 0 and 1 for v2i64 and 0, 1, 2, 3 for v4i32.
10234  std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1);
10235  SDValue SplatVal;
10236  for (unsigned I = 0; I < NumVecElts; ++I) {
10237  // Check for insert vector elements.
10238  if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT)
10239  return SDValue();
10240 
10241  // Check that same value is inserted at each vector element.
10242  if (I == 0)
10243  SplatVal = StVal.getOperand(1);
10244  else if (StVal.getOperand(1) != SplatVal)
10245  return SDValue();
10246 
10247  // Check insert element index.
10248  ConstantSDNode *CIndex = dyn_cast<ConstantSDNode>(StVal.getOperand(2));
10249  if (!CIndex)
10250  return SDValue();
10251  uint64_t IndexVal = CIndex->getZExtValue();
10252  if (IndexVal >= NumVecElts)
10253  return SDValue();
10254  IndexNotInserted.reset(IndexVal);
10255 
10256  StVal = StVal.getOperand(0);
10257  }
10258  // Check that all vector element locations were inserted to.
10259  if (IndexNotInserted.any())
10260  return SDValue();
10261 
10262  return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
10263 }
10264 
10266  SelectionDAG &DAG,
10267  const AArch64Subtarget *Subtarget) {
10268 
10269  StoreSDNode *S = cast<StoreSDNode>(N);
10270  if (S->isVolatile() || S->isIndexed())
10271  return SDValue();
10272 
10273  SDValue StVal = S->getValue();
10274  EVT VT = StVal.getValueType();
10275  if (!VT.isVector())
10276  return SDValue();
10277 
10278  // If we get a splat of zeros, convert this vector store to a store of
10279  // scalars. They will be merged into store pairs of xzr thereby removing one
10280  // instruction and one register.
10281  if (SDValue ReplacedZeroSplat = replaceZeroVectorStore(DAG, *S))
10282  return ReplacedZeroSplat;
10283 
10284  // FIXME: The logic for deciding if an unaligned store should be split should
10285  // be included in TLI.allowsMisalignedMemoryAccesses(), and there should be
10286  // a call to that function here.
10287 
10288  if (!Subtarget->isMisaligned128StoreSlow())
10289  return SDValue();
10290 
10291  // Don't split at -Oz.
10293  return SDValue();
10294 
10295  // Don't split v2i64 vectors. Memcpy lowering produces those and splitting
10296  // those up regresses performance on micro-benchmarks and olden/bh.
10297  if (VT.getVectorNumElements() < 2 || VT == MVT::v2i64)
10298  return SDValue();
10299 
10300  // Split unaligned 16B stores. They are terrible for performance.
10301  // Don't split stores with alignment of 1 or 2. Code that uses clang vector
10302  // extensions can use this to mark that it does not want splitting to happen
10303  // (by underspecifying alignment to be 1 or 2). Furthermore, the chance of
10304  // eliminating alignment hazards is only 1 in 8 for alignment of 2.
10305  if (VT.getSizeInBits() != 128 || S->getAlignment() >= 16 ||
10306  S->getAlignment() <= 2)
10307  return SDValue();
10308 
10309  // If we get a splat of a scalar convert this vector store to a store of
10310  // scalars. They will be merged into store pairs thereby removing two
10311  // instructions.
10312  if (SDValue ReplacedSplat = replaceSplatVectorStore(DAG, *S))
10313  return ReplacedSplat;
10314 
10315  SDLoc DL(S);
10316  unsigned NumElts = VT.getVectorNumElements() / 2;
10317  // Split VT into two.
10318  EVT HalfVT =
10319  EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), NumElts);
10320  SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
10321  DAG.getConstant(0, DL, MVT::i64));
10322  SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
10323  DAG.getConstant(NumElts, DL, MVT::i64));
10324  SDValue BasePtr = S->getBasePtr();
10325  SDValue NewST1 =
10326  DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(),
10327  S->getAlignment(), S->getMemOperand()->getFlags());
10328  SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
10329  DAG.getConstant(8, DL, MVT::i64));
10330  return DAG.getStore(NewST1.getValue(0), DL, SubVector1, OffsetPtr,
10331  S->getPointerInfo(), S->getAlignment(),
10332  S->getMemOperand()->getFlags());
10333 }
10334 
10335 /// Target-specific DAG combine function for post-increment LD1 (lane) and
10336 /// post-increment LD1R.
10339  bool IsLaneOp) {
10340  if (DCI.isBeforeLegalizeOps())
10341  return SDValue();
10342 
10343  SelectionDAG &DAG = DCI.DAG;
10344  EVT VT = N->getValueType(0);
10345 
10346  unsigned LoadIdx = IsLaneOp ? 1 : 0;
10347  SDNode *LD = N->getOperand(LoadIdx).getNode();
10348  // If it is not LOAD, can not do such combine.
10349  if (LD->getOpcode() != ISD::LOAD)
10350  return SDValue();
10351 
10352  // The vector lane must be a constant in the LD1LANE opcode.
10353  SDValue Lane;
10354  if (IsLaneOp) {
10355  Lane = N->getOperand(2);
10356  auto *LaneC = dyn_cast<ConstantSDNode>(Lane);
10357  if (!LaneC || LaneC->getZExtValue() >= VT.getVectorNumElements())
10358  return SDValue();
10359  }
10360 
10361  LoadSDNode *LoadSDN = cast<LoadSDNode>(LD);
10362  EVT MemVT = LoadSDN->getMemoryVT();
10363  // Check if memory operand is the same type as the vector element.
10364  if (MemVT != VT.getVectorElementType())
10365  return SDValue();
10366 
10367  // Check if there are other uses. If so, do not combine as it will introduce
10368  // an extra load.
10369  for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end(); UI != UE;
10370  ++UI) {
10371  if (UI.getUse().getResNo() == 1) // Ignore uses of the chain result.
10372  continue;
10373  if (*UI != N)
10374  return SDValue();
10375  }
10376 
10377  SDValue Addr = LD->getOperand(1);
10378  SDValue Vector = N->getOperand(0);
10379  // Search for a use of the address operand that is an increment.
10380  for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), UE =
10381  Addr.getNode()->use_end(); UI != UE; ++UI) {
10382  SDNode *User = *UI;
10383  if (User->getOpcode() != ISD::ADD
10384  || UI.getUse().getResNo() != Addr.getResNo())
10385  continue;
10386 
10387  // If the increment is a constant, it must match the memory ref size.
10388  SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
10389  if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
10390  uint32_t IncVal = CInc->getZExtValue();
10391  unsigned NumBytes = VT.getScalarSizeInBits() / 8;
10392  if (IncVal != NumBytes)
10393  continue;
10394  Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
10395  }
10396 
10397  // To avoid cycle construction make sure that neither the load nor the add
10398  // are predecessors to each other or the Vector.
10401  Visited.insert(N);
10402  Worklist.push_back(User);
10403  Worklist.push_back(LD);
10404  Worklist.push_back(Vector.getNode());
10405  if (SDNode::hasPredecessorHelper(LD, Visited, Worklist) ||
10406  SDNode::hasPredecessorHelper(User, Visited, Worklist))
10407  continue;
10408 
10410  Ops.push_back(LD->getOperand(0)); // Chain
10411  if (IsLaneOp) {
10412  Ops.push_back(Vector); // The vector to be inserted
10413  Ops.push_back(Lane); // The lane to be inserted in the vector
10414  }
10415  Ops.push_back(Addr);
10416  Ops.push_back(Inc);
10417 
10418  EVT Tys[3] = { VT, MVT::i64, MVT::Other };
10419  SDVTList SDTys = DAG.getVTList(Tys);
10420  unsigned NewOp = IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost;
10421  SDValue UpdN = DAG.getMemIntrinsicNode(NewOp, SDLoc(N), SDTys, Ops,
10422  MemVT,
10423  LoadSDN->getMemOperand());
10424 
10425  // Update the uses.
10426  SDValue NewResults[] = {
10427  SDValue(LD, 0), // The result of load
10428  SDValue(UpdN.getNode(), 2) // Chain
10429  };
10430  DCI.CombineTo(LD, NewResults);
10431  DCI.CombineTo(N, SDValue(UpdN.getNode(), 0)); // Dup/Inserted Result
10432  DCI.CombineTo(User, SDValue(UpdN.getNode(), 1)); // Write back register
10433 
10434  break;
10435  }
10436  return SDValue();
10437 }
10438 
10439 /// Simplify ``Addr`` given that the top byte of it is ignored by HW during
10440 /// address translation.
10443  SelectionDAG &DAG) {
10444  APInt DemandedMask = APInt::getLowBitsSet(64, 56);
10445  KnownBits Known;
10447  !DCI.isBeforeLegalizeOps());
10448  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10449  if (TLI.SimplifyDemandedBits(Addr, DemandedMask, Known, TLO)) {
10450  DCI.CommitTargetLoweringOpt(TLO);
10451  return true;
10452  }
10453  return false;
10454 }
10455 
10458  SelectionDAG &DAG,
10459  const AArch64Subtarget *Subtarget) {
10460  if (SDValue Split = splitStores(N, DCI, DAG, Subtarget))
10461  return Split;
10462 
10463  if (Subtarget->supportsAddressTopByteIgnored() &&
10464  performTBISimplification(N->getOperand(2), DCI, DAG))
10465  return SDValue(N, 0);
10466 
10467  return SDValue();
10468 }
10469 
10470 
10471 /// Target-specific DAG combine function for NEON load/store intrinsics
10472 /// to merge base address updates.
10475  SelectionDAG &DAG) {
10476  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
10477  return SDValue();
10478 
10479  unsigned AddrOpIdx = N->getNumOperands() - 1;
10480  SDValue Addr = N->getOperand(AddrOpIdx);
10481 
10482  // Search for a use of the address operand that is an increment.
10483  for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
10484  UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
10485  SDNode *User = *UI;
10486  if (User->getOpcode() != ISD::ADD ||
10487  UI.getUse().getResNo() != Addr.getResNo())
10488  continue;
10489 
10490  // Check that the add is independent of the load/store. Otherwise, folding
10491  // it would create a cycle.
10494  Visited.insert(Addr.getNode());
10495  Worklist.push_back(N);
10496  Worklist.push_back(User);
10497  if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
10498  SDNode::hasPredecessorHelper(User, Visited, Worklist))
10499  continue;
10500 
10501  // Find the new opcode for the updating load/store.
10502  bool IsStore = false;
10503  bool IsLaneOp = false;
10504  bool IsDupOp = false;
10505  unsigned NewOpc = 0;
10506  unsigned NumVecs = 0;
10507  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
10508  switch (IntNo) {
10509  default: llvm_unreachable("unexpected intrinsic for Neon base update");
10511  NumVecs = 2; break;
10513  NumVecs = 3; break;
10515  NumVecs = 4; break;
10517  NumVecs = 2; IsStore = true; break;
10519  NumVecs = 3; IsStore = true; break;
10521  NumVecs = 4; IsStore = true; break;
10523  NumVecs = 2; break;
10525  NumVecs = 3; break;
10527  NumVecs = 4; break;
10529  NumVecs = 2; IsStore = true; break;
10531  NumVecs = 3; IsStore = true; break;
10533  NumVecs = 4; IsStore = true; break;
10535  NumVecs = 2; IsDupOp = true; break;
10537  NumVecs = 3; IsDupOp = true; break;
10539  NumVecs = 4; IsDupOp = true; break;
10541  NumVecs = 2; IsLaneOp = true; break;
10543  NumVecs = 3; IsLaneOp = true; break;
10545  NumVecs = 4; IsLaneOp = true; break;
10547  NumVecs = 2; IsStore = true; IsLaneOp = true; break;
10549  NumVecs = 3; IsStore = true; IsLaneOp = true; break;
10551  NumVecs = 4; IsStore = true; IsLaneOp = true; break;
10552  }
10553 
10554  EVT VecTy;
10555  if (IsStore)
10556  VecTy = N->getOperand(2).getValueType();
10557  else
10558  VecTy = N->getValueType(0);
10559 
10560  // If the increment is a constant, it must match the memory ref size.
10561  SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
10562  if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
10563  uint32_t IncVal = CInc->getZExtValue();
10564  unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
10565  if (IsLaneOp || IsDupOp)
10566  NumBytes /= VecTy.getVectorNumElements();
10567  if (IncVal != NumBytes)
10568  continue;
10569  Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
10570  }
10572  Ops.push_back(N->getOperand(0)); // Incoming chain
10573  // Load lane and store have vector list as input.
10574  if (IsLaneOp || IsStore)
10575  for (unsigned i = 2; i < AddrOpIdx; ++i)
10576  Ops.push_back(N->getOperand(i));
10577  Ops.push_back(Addr); // Base register
10578  Ops.push_back(Inc);
10579 
10580  // Return Types.
10581  EVT Tys[6];
10582  unsigned NumResultVecs = (IsStore ? 0 : NumVecs);
10583  unsigned n;
10584  for (n = 0; n < NumResultVecs; ++n)
10585  Tys[n] = VecTy;
10586  Tys[n++] = MVT::i64; // Type of write back register
10587  Tys[n] = MVT::Other; // Type of the chain
10588  SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs + 2));
10589 
10590  MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
10591  SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops,
10592  MemInt->getMemoryVT(),
10593  MemInt->getMemOperand());
10594 
10595  // Update the uses.
10596  std::vector<SDValue> NewResults;
10597  for (unsigned i = 0; i < NumResultVecs; ++i) {
10598  NewResults.push_back(SDValue(UpdN.getNode(), i));
10599  }
10600  NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1));
10601  DCI.CombineTo(N, NewResults);
10602  DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
10603 
10604  break;
10605  }
10606  return SDValue();
10607 }
10608 
10609 // Checks to see if the value is the prescribed width and returns information
10610 // about its extension mode.
10611 static
10613  ExtType = ISD::NON_EXTLOAD;
10614  switch(V.getNode()->getOpcode()) {
10615  default:
10616  return false;
10617  case ISD::LOAD: {
10618  LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
10619  if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8)
10620  || (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) {
10621  ExtType = LoadNode->getExtensionType();
10622  return true;
10623  }
10624  return false;
10625  }
10626  case ISD::AssertSext: {
10627  VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
10628  if ((TypeNode->getVT() == MVT::i8 && width == 8)
10629  || (TypeNode->getVT() == MVT::i16 && width == 16)) {
10630  ExtType = ISD::SEXTLOAD;
10631  return true;
10632  }
10633  return false;
10634  }
10635  case ISD::AssertZext: {
10636  VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
10637  if ((TypeNode->getVT() == MVT::i8 && width == 8)
10638  || (TypeNode->getVT() == MVT::i16 && width == 16)) {
10639  ExtType = ISD::ZEXTLOAD;
10640  return true;
10641  }
10642  return false;
10643  }
10644  case ISD::Constant:
10645  case ISD::TargetConstant: {
10646  return std::abs(cast<ConstantSDNode>(V.getNode())->getSExtValue()) <
10647  1LL << (width - 1);
10648  }
10649  }
10650 
10651  return true;
10652 }
10653 
10654 // This function does a whole lot of voodoo to determine if the tests are
10655 // equivalent without and with a mask. Essentially what happens is that given a
10656 // DAG resembling:
10657 //
10658 // +-------------+ +-------------+ +-------------+ +-------------+
10659 // | Input | | AddConstant | | CompConstant| | CC |
10660 // +-------------+ +-------------+ +-------------+ +-------------+
10661 // | | | |
10662 // V V | +----------+
10663 // +-------------+ +----+ | |
10664 // | ADD | |0xff| | |
10665 // +-------------+ +----+ | |
10666 // | | | |
10667 // V V | |
10668 // +-------------+ | |
10669 // | AND | | |
10670 // +-------------+ | |
10671 // | | |
10672 // +-----+ | |
10673 // | | |
10674 // V V V
10675 // +-------------+
10676 // | CMP |
10677 // +-------------+
10678 //
10679 // The AND node may be safely removed for some combinations of inputs. In
10680 // particular we need to take into account the extension type of the Input,
10681 // the exact values of AddConstant, CompConstant, and CC, along with the nominal
10682 // width of the input (this can work for any width inputs, the above graph is
10683 // specific to 8 bits.
10684 //
10685 // The specific equations were worked out by generating output tables for each
10686 // AArch64CC value in terms of and AddConstant (w1), CompConstant(w2). The
10687 // problem was simplified by working with 4 bit inputs, which means we only
10688 // needed to reason about 24 distinct bit patterns: 8 patterns unique to zero
10689 // extension (8,15), 8 patterns unique to sign extensions (-8,-1), and 8
10690 // patterns present in both extensions (0,7). For every distinct set of
10691 // AddConstant and CompConstants bit patterns we can consider the masked and
10692 // unmasked versions to be equivalent if the result of this function is true for
10693 // all 16 distinct bit patterns of for the current extension type of Input (w0).
10694 //
10695 // sub w8, w0, w1
10696 // and w10, w8, #0x0f
10697 // cmp w8, w2
10698 // cset w9, AArch64CC
10699 // cmp w10, w2
10700 // cset w11, AArch64CC
10701 // cmp w9, w11
10702 // cset w0, eq
10703 // ret
10704 //
10705 // Since the above function shows when the outputs are equivalent it defines
10706 // when it is safe to remove the AND. Unfortunately it only runs on AArch64 and
10707 // would be expensive to run during compiles. The equations below were written
10708 // in a test harness that confirmed they gave equivalent outputs to the above
10709 // for all inputs function, so they can be used determine if the removal is
10710 // legal instead.
10711 //
10712 // isEquivalentMaskless() is the code for testing if the AND can be removed
10713 // factored out of the DAG recognition as the DAG can take several forms.
10714 
10715 static bool isEquivalentMaskless(unsigned CC, unsigned width,
10716  ISD::LoadExtType ExtType, int AddConstant,
10717  int CompConstant) {
10718  // By being careful about our equations and only writing the in term
10719  // symbolic values and well known constants (0, 1, -1, MaxUInt) we can
10720  // make them generally applicable to all bit widths.
10721  int MaxUInt = (1 << width);
10722 
10723  // For the purposes of these comparisons sign extending the type is
10724  // equivalent to zero extending the add and displacing it by half the integer
10725  // width. Provided we are careful and make sure our equations are valid over
10726  // the whole range we can just adjust the input and avoid writing equations
10727  // for sign extended inputs.
10728  if (ExtType == ISD::SEXTLOAD)
10729  AddConstant -= (1 << (width-1));
10730 
10731  switch(CC) {
10732  case AArch64CC::LE:
10733  case AArch64CC::GT:
10734  if ((AddConstant == 0) ||
10735  (CompConstant == MaxUInt - 1 && AddConstant < 0) ||
10736  (AddConstant >= 0 && CompConstant < 0) ||
10737  (AddConstant <= 0 && CompConstant <= 0 && CompConstant < AddConstant))
10738  return true;
10739  break;
10740  case AArch64CC::LT:
10741  case AArch64CC::GE:
10742  if ((AddConstant == 0) ||
10743  (AddConstant >= 0 && CompConstant <= 0) ||
10744  (AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant))
10745  return true;
10746  break;
10747  case AArch64CC::HI:
10748  case AArch64CC::LS:
10749  if ((AddConstant >= 0 && CompConstant < 0) ||
10750  (AddConstant <= 0 && CompConstant >= -1 &&
10751  CompConstant < AddConstant + MaxUInt))
10752  return true;
10753  break;
10754  case AArch64CC::PL:
10755  case AArch64CC::MI:
10756  if ((AddConstant == 0) ||
10757  (AddConstant > 0 && CompConstant <= 0) ||
10758  (AddConstant < 0 && CompConstant <= AddConstant))
10759  return true;
10760  break;
10761  case AArch64CC::LO:
10762  case AArch64CC::HS:
10763  if ((AddConstant >= 0 && CompConstant <= 0) ||
10764  (AddConstant <= 0 && CompConstant >= 0 &&
10765  CompConstant <= AddConstant + MaxUInt))
10766  return true;
10767  break;
10768  case AArch64CC::EQ:
10769  case AArch64CC::NE:
10770  if ((AddConstant > 0 && CompConstant < 0) ||
10771  (AddConstant < 0 && CompConstant >= 0 &&
10772  CompConstant < AddConstant + MaxUInt) ||
10773  (AddConstant >= 0 && CompConstant >= 0 &&
10774  CompConstant >= AddConstant) ||
10775  (AddConstant <= 0 && CompConstant < 0 && CompConstant < AddConstant))
10776  return true;
10777  break;
10778  case AArch64CC::VS:
10779  case AArch64CC::VC:
10780  case AArch64CC::AL:
10781  case AArch64CC::NV:
10782  return true;
10783  case AArch64CC::Invalid:
10784  break;
10785  }
10786 
10787  return false;
10788 }
10789 
10790 static
10793  SelectionDAG &DAG, unsigned CCIndex,
10794  unsigned CmpIndex) {
10795  unsigned CC = cast<ConstantSDNode>(N->getOperand(CCIndex))->getSExtValue();
10796  SDNode *SubsNode = N->getOperand(CmpIndex).getNode();
10797  unsigned CondOpcode = SubsNode->getOpcode();
10798 
10799  if (CondOpcode != AArch64ISD::SUBS)
10800  return SDValue();
10801 
10802  // There is a SUBS feeding this condition. Is it fed by a mask we can
10803  // use?
10804 
10805  SDNode *AndNode = SubsNode->getOperand(0).getNode();
10806  unsigned MaskBits = 0;
10807 
10808  if (AndNode->getOpcode() != ISD::AND)
10809  return SDValue();
10810 
10811  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndNode->getOperand(1))) {
10812  uint32_t CNV = CN->getZExtValue();
10813  if (CNV == 255)
10814  MaskBits = 8;
10815  else if (CNV == 65535)
10816  MaskBits = 16;
10817  }
10818 
10819  if (!MaskBits)
10820  return SDValue();
10821 
10822  SDValue AddValue = AndNode->getOperand(0);
10823 
10824  if (AddValue.getOpcode() != ISD::ADD)
10825  return SDValue();
10826 
10827  // The basic dag structure is correct, grab the inputs and validate them.
10828 
10829  SDValue AddInputValue1 = AddValue.getNode()->getOperand(0);
10830  SDValue AddInputValue2 = AddValue.getNode()->getOperand(1);
10831  SDValue SubsInputValue = SubsNode->getOperand(1);
10832 
10833  // The mask is present and the provenance of all the values is a smaller type,
10834  // lets see if the mask is superfluous.
10835 
10836  if (!isa<ConstantSDNode>(AddInputValue2.getNode()) ||
10837  !isa<ConstantSDNode>(SubsInputValue.getNode()))
10838  return SDValue();
10839 
10841 
10842  if (!checkValueWidth(SubsInputValue, MaskBits, ExtType) ||
10843  !checkValueWidth(AddInputValue2, MaskBits, ExtType) ||
10844  !checkValueWidth(AddInputValue1, MaskBits, ExtType) )
10845  return SDValue();
10846 
10847  if(!isEquivalentMaskless(CC, MaskBits, ExtType,
10848  cast<ConstantSDNode>(AddInputValue2.getNode())->getSExtValue(),
10849  cast<ConstantSDNode>(SubsInputValue.getNode())->getSExtValue()))
10850  return SDValue();
10851 
10852  // The AND is not necessary, remove it.
10853 
10854  SDVTList VTs = DAG.getVTList(SubsNode->getValueType(0),
10855  SubsNode->getValueType(1));
10856  SDValue Ops[] = { AddValue, SubsNode->getOperand(1) };
10857 
10858  SDValue NewValue = DAG.getNode(CondOpcode, SDLoc(SubsNode), VTs, Ops);
10859  DAG.ReplaceAllUsesWith(SubsNode, NewValue.getNode());
10860 
10861  return SDValue(N, 0);
10862 }
10863 
10864 // Optimize compare with zero and branch.
10867  SelectionDAG &DAG) {
10868  MachineFunction &MF = DAG.getMachineFunction();
10869  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
10870  // will not be produced, as they are conditional branch instructions that do
10871  // not set flags.
10873  return SDValue();
10874 
10875  if (SDValue NV = performCONDCombine(N, DCI, DAG, 2, 3))
10876  N = NV.getNode();
10877  SDValue Chain = N->getOperand(0);
10878  SDValue Dest = N->getOperand(1);
10879  SDValue CCVal = N->getOperand(2);
10880  SDValue Cmp = N->getOperand(3);
10881 
10882  assert(isa<ConstantSDNode>(CCVal) && "Expected a ConstantSDNode here!");
10883  unsigned CC = cast<ConstantSDNode>(CCVal)->getZExtValue();
10884  if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
10885  return SDValue();
10886 
10887  unsigned CmpOpc = Cmp.getOpcode();
10888  if (CmpOpc != AArch64ISD::ADDS && CmpOpc != AArch64ISD::SUBS)
10889  return SDValue();
10890 
10891  // Only attempt folding if there is only one use of the flag and no use of the
10892  // value.
10893  if (!Cmp->hasNUsesOfValue(0, 0) || !Cmp->hasNUsesOfValue(1, 1))
10894  return SDValue();
10895 
10896  SDValue LHS = Cmp.getOperand(0);
10897  SDValue RHS = Cmp.getOperand(1);
10898 
10899  assert(LHS.getValueType() == RHS.getValueType() &&
10900  "Expected the value type to be the same for both operands!");
10901  if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
10902  return SDValue();
10903 
10904  if (isNullConstant(LHS))
10905  std::swap(LHS, RHS);
10906 
10907  if (!isNullConstant(RHS))
10908  return SDValue();
10909 
10910  if (LHS.getOpcode() == ISD::SHL || LHS.getOpcode() == ISD::SRA ||
10911  LHS.getOpcode() == ISD::SRL)
10912  return SDValue();
10913 
10914  // Fold the compare into the branch instruction.
10915  SDValue BR;
10916  if (CC == AArch64CC::EQ)
10917  BR = DAG.getNode(AArch64ISD::CBZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
10918  else
10919  BR = DAG.getNode(AArch64ISD::CBNZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
10920 
10921  // Do not add new nodes to DAG combiner worklist.
10922  DCI.CombineTo(N, BR, false);
10923 
10924  return SDValue();
10925 }
10926 
10927 // Optimize some simple tbz/tbnz cases. Returns the new operand and bit to test
10928 // as well as whether the test should be inverted. This code is required to
10929 // catch these cases (as opposed to standard dag combines) because
10930 // AArch64ISD::TBZ is matched during legalization.
10931 static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert,
10932  SelectionDAG &DAG) {
10933 
10934  if (!Op->hasOneUse())
10935  return Op;
10936 
10937  // We don't handle undef/constant-fold cases below, as they should have
10938  // already been taken care of (e.g. and of 0, test of undefined shifted bits,
10939  // etc.)
10940 
10941  // (tbz (trunc x), b) -> (tbz x, b)
10942  // This case is just here to enable more of the below cases to be caught.
10943  if (Op->getOpcode() == ISD::TRUNCATE &&
10944  Bit < Op->getValueType(0).getSizeInBits()) {
10945  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
10946  }
10947 
10948  if (Op->getNumOperands() != 2)
10949  return Op;
10950 
10951  auto *C = dyn_cast<ConstantSDNode>(Op->getOperand(1));
10952  if (!C)
10953  return Op;
10954 
10955  switch (Op->getOpcode()) {
10956  default:
10957  return Op;
10958 
10959  // (tbz (and x, m), b) -> (tbz x, b)
10960  case ISD::AND:
10961  if ((C->getZExtValue() >> Bit) & 1)
10962  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
10963  return Op;
10964 
10965  // (tbz (shl x, c), b) -> (tbz x, b-c)
10966  case ISD::SHL:
10967  if (C->getZExtValue() <= Bit &&
10968  (Bit - C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
10969  Bit = Bit - C->getZExtValue();
10970  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
10971  }
10972  return Op;
10973 
10974  // (tbz (sra x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits in x
10975  case ISD::SRA:
10976  Bit = Bit + C->getZExtValue();
10977  if (Bit >= Op->getValueType(0).getSizeInBits())
10978  Bit = Op->getValueType(0).getSizeInBits() - 1;
10979  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
10980 
10981  // (tbz (srl x, c), b) -> (tbz x, b+c)
10982  case ISD::SRL:
10983  if ((Bit + C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
10984  Bit = Bit + C->getZExtValue();
10985  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
10986  }
10987  return Op;
10988 
10989  // (tbz (xor x, -1), b) -> (tbnz x, b)
10990  case ISD::XOR:
10991  if ((C->getZExtValue() >> Bit) & 1)
10992  Invert = !Invert;
10993  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
10994  }
10995 }
10996 
10997 // Optimize test single bit zero/non-zero and branch.
11000  SelectionDAG &DAG) {
11001  unsigned Bit = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
11002  bool Invert = false;
11003  SDValue TestSrc = N->getOperand(1);
11004  SDValue NewTestSrc = getTestBitOperand(TestSrc, Bit, Invert, DAG);
11005 
11006  if (TestSrc == NewTestSrc)
11007  return SDValue();
11008 
11009  unsigned NewOpc = N->getOpcode();
11010  if (Invert) {
11011  if (NewOpc == AArch64ISD::TBZ)
11012  NewOpc = AArch64ISD::TBNZ;
11013  else {
11014  assert(NewOpc == AArch64ISD::TBNZ);
11015  NewOpc = AArch64ISD::TBZ;
11016  }
11017  }
11018 
11019  SDLoc DL(N);
11020  return DAG.getNode(NewOpc, DL, MVT::Other, N->getOperand(0), NewTestSrc,
11021  DAG.getConstant(Bit, DL, MVT::i64), N->getOperand(3));
11022 }
11023 
11024 // vselect (v1i1 setcc) ->
11025 // vselect (v1iXX setcc) (XX is the size of the compared operand type)
11026 // FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
11027 // condition. If it can legalize "VSELECT v1i1" correctly, no need to combine
11028 // such VSELECT.
11030  SDValue N0 = N->getOperand(0);
11031  EVT CCVT = N0.getValueType();
11032 
11033  if (N0.getOpcode() != ISD::SETCC || CCVT.getVectorNumElements() != 1 ||
11034  CCVT.getVectorElementType() != MVT::i1)
11035  return SDValue();
11036 
11037  EVT ResVT = N->getValueType(0);
11038  EVT CmpVT = N0.getOperand(0).getValueType();
11039  // Only combine when the result type is of the same size as the compared
11040  // operands.
11041  if (ResVT.getSizeInBits() != CmpVT.getSizeInBits())
11042  return SDValue();
11043 
11044  SDValue IfTrue = N->getOperand(1);
11045  SDValue IfFalse = N->getOperand(2);
11046  SDValue SetCC =
11048  N0.getOperand(0), N0.getOperand(1),
11049  cast<CondCodeSDNode>(N0.getOperand(2))->get());
11050  return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC,
11051  IfTrue, IfFalse);
11052 }
11053 
11054 /// A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with
11055 /// the compare-mask instructions rather than going via NZCV, even if LHS and
11056 /// RHS are really scalar. This replaces any scalar setcc in the above pattern
11057 /// with a vector one followed by a DUP shuffle on the result.
11060  SelectionDAG &DAG = DCI.DAG;
11061  SDValue N0 = N->getOperand(0);
11062  EVT ResVT = N->getValueType(0);
11063 
11064  if (N0.getOpcode() != ISD::SETCC)
11065  return SDValue();
11066 
11067  // Make sure the SETCC result is either i1 (initial DAG), or i32, the lowered
11068  // scalar SetCCResultType. We also don't expect vectors, because we assume
11069  // that selects fed by vector SETCCs are canonicalized to VSELECT.
11070  assert((N0.getValueType() == MVT::i1 || N0.getValueType() == MVT::i32) &&
11071  "Scalar-SETCC feeding SELECT has unexpected result type!");
11072 
11073  // If NumMaskElts == 0, the comparison is larger than select result. The
11074  // largest real NEON comparison is 64-bits per lane, which means the result is
11075  // at most 32-bits and an illegal vector. Just bail out for now.
11076  EVT SrcVT = N0.getOperand(0).getValueType();
11077 
11078  // Don't try to do this optimization when the setcc itself has i1 operands.
11079  // There are no legal vectors of i1, so this would be pointless.
11080  if (SrcVT == MVT::i1)
11081  return SDValue();
11082 
11083  int NumMaskElts = ResVT.getSizeInBits() / SrcVT.getSizeInBits();
11084  if (!ResVT.isVector() || NumMaskElts == 0)
11085  return SDValue();
11086 
11087  SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumMaskElts);
11088  EVT CCVT = SrcVT.changeVectorElementTypeToInteger();
11089 
11090  // Also bail out if the vector CCVT isn't the same size as ResVT.
11091  // This can happen if the SETCC operand size doesn't divide the ResVT size
11092  // (e.g., f64 vs v3f32).
11093  if (CCVT.getSizeInBits() != ResVT.getSizeInBits())
11094  return SDValue();
11095 
11096  // Make sure we didn't create illegal types, if we're not supposed to.
11097  assert(DCI.isBeforeLegalize() ||
11098  DAG.getTargetLoweringInfo().isTypeLegal(SrcVT));
11099 
11100  // First perform a vector comparison, where lane 0 is the one we're interested
11101  // in.
11102  SDLoc DL(N0);
11103  SDValue LHS =
11104  DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(0));
11105  SDValue RHS =
11106  DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(1));
11107  SDValue SetCC = DAG.getNode(ISD::SETCC, DL, CCVT, LHS, RHS, N0.getOperand(2));
11108 
11109  // Now duplicate the comparison mask we want across all other lanes.
11110  SmallVector<int, 8> DUPMask(CCVT.getVectorNumElements(), 0);
11111  SDValue Mask = DAG.getVectorShuffle(CCVT, DL, SetCC, SetCC, DUPMask);
11112  Mask = DAG.getNode(ISD::BITCAST, DL,
11114 
11115  return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
11116 }
11117 
11118 /// Get rid of unnecessary NVCASTs (that don't change the type).
11120  if (N->getValueType(0) == N->getOperand(0).getValueType())
11121  return N->getOperand(0);
11122 
11123  return SDValue();
11124 }
11125 
11126 // If all users of the globaladdr are of the form (globaladdr + constant), find
11127 // the smallest constant, fold it into the globaladdr's offset and rewrite the
11128 // globaladdr as (globaladdr + constant) - constant.
11130  const AArch64Subtarget *Subtarget,
11131  const TargetMachine &TM) {
11132  auto *GN = cast<GlobalAddressSDNode>(N);
11133  if (Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) !=
11135  return SDValue();
11136 
11137  uint64_t MinOffset = -1ull;
11138  for (SDNode *N : GN->uses()) {
11139  if (N->getOpcode() != ISD::ADD)
11140  return SDValue();
11141  auto *C = dyn_cast<ConstantSDNode>(N->getOperand(0));
11142  if (!C)
11143  C = dyn_cast<ConstantSDNode>(N->getOperand(1));
11144  if (!C)
11145  return SDValue();
11146  MinOffset = std::min(MinOffset, C->getZExtValue());
11147  }
11148  uint64_t Offset = MinOffset + GN->getOffset();
11149 
11150  // Require that the new offset is larger than the existing one. Otherwise, we
11151  // can end up oscillating between two possible DAGs, for example,
11152  // (add (add globaladdr + 10, -1), 1) and (add globaladdr + 9, 1).
11153  if (Offset <= uint64_t(GN->getOffset()))
11154  return SDValue();
11155 
11156  // Check whether folding this offset is legal. It must not go out of bounds of
11157  // the referenced object to avoid violating the code model, and must be
11158  // smaller than 2^21 because this is the largest offset expressible in all
11159  // object formats.
11160  //
11161  // This check also prevents us from folding negative offsets, which will end
11162  // up being treated in the same way as large positive ones. They could also
11163  // cause code model violations, and aren't really common enough to matter.
11164  if (Offset >= (1 << 21))
11165  return SDValue();
11166 
11167  const GlobalValue *GV = GN->getGlobal();
11168  Type *T = GV->getValueType();
11169  if (!T->isSized() ||
11170  Offset > GV->getParent()->getDataLayout().getTypeAllocSize(T))
11171  return SDValue();
11172 
11173  SDLoc DL(GN);
11174  SDValue Result = DAG.getGlobalAddress(GV, DL, MVT::i64, Offset);
11175  return DAG.getNode(ISD::SUB, DL, MVT::i64, Result,
11176  DAG.getConstant(MinOffset, DL, MVT::i64));
11177 }
11178 
11180  DAGCombinerInfo &DCI) const {
11181  SelectionDAG &DAG = DCI.DAG;
11182  switch (N->getOpcode()) {
11183  default:
11184  LLVM_DEBUG(dbgs() << "Custom combining: skipping\n");
11185  break;
11186  case ISD::ADD:
11187  case ISD::SUB:
11188  return performAddSubLongCombine(N, DCI, DAG);
11189  case ISD::XOR:
11190  return performXorCombine(N, DAG, DCI, Subtarget);
11191  case ISD::MUL:
11192  return performMulCombine(N, DAG, DCI, Subtarget);
11193  case ISD::SINT_TO_FP:
11194  case ISD::UINT_TO_FP:
11195  return performIntToFpCombine(N, DAG, Subtarget);
11196  case ISD::FP_TO_SINT:
11197  case ISD::FP_TO_UINT:
11198  return performFpToIntCombine(N, DAG, DCI, Subtarget);
11199  case ISD::FDIV:
11200  return performFDivCombine(N, DAG, DCI, Subtarget);
11201  case ISD::OR:
11202  return performORCombine(N, DCI, Subtarget);
11203  case ISD::SRL:
11204  return performSRLCombine(N, DCI);
11206  return performIntrinsicCombine(N, DCI, Subtarget);
11207  case ISD::ANY_EXTEND:
11208  case ISD::ZERO_EXTEND:
11209  case ISD::SIGN_EXTEND:
11210  return performExtendCombine(N, DCI, DAG);
11211  case ISD::BITCAST:
11212  return performBitcastCombine(N, DCI, DAG);
11213  case ISD::CONCAT_VECTORS:
11214  return performConcatVectorsCombine(N, DCI, DAG);
11215  case ISD::SELECT:
11216  return performSelectCombine(N, DCI);
11217  case ISD::VSELECT:
11218  return performVSelectCombine(N, DCI.DAG);
11219  case ISD::LOAD:
11220  if (performTBISimplification(N->getOperand(1), DCI, DAG))
11221  return SDValue(N, 0);
11222  break;
11223  case ISD::STORE:
11224  return performSTORECombine(N, DCI, DAG, Subtarget);
11225  case AArch64ISD::BRCOND:
11226  return performBRCONDCombine(N, DCI, DAG);
11227  case AArch64ISD::TBNZ:
11228  case AArch64ISD::TBZ:
11229  return performTBZCombine(N, DCI, DAG);
11230  case AArch64ISD::CSEL:
11231  return performCONDCombine(N, DCI, DAG, 2, 3);
11232  case AArch64ISD::DUP:
11233  return performPostLD1Combine(N, DCI, false);
11234  case AArch64ISD::NVCAST:
11235  return performNVCASTCombine(N);
11237  return performPostLD1Combine(N, DCI, true);
11238  case ISD::INTRINSIC_VOID:
11240  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
11262  return performNEONPostLDSTCombine(N, DCI, DAG);
11263  default:
11264  break;
11265  }
11266  break;
11267  case ISD::GlobalAddress:
11268  return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
11269  }
11270  return SDValue();
11271 }
11272 
11273 // Check if the return value is used as only a return value, as otherwise
11274 // we can't perform a tail-call. In particular, we need to check for
11275 // target ISD nodes that are returns and any other "odd" constructs
11276 // that the generic analysis code won't necessarily catch.
11277 bool AArch64TargetLowering::isUsedByReturnOnly(SDNode *N,
11278  SDValue &Chain) const {
11279  if (N->getNumValues() != 1)
11280  return false;
11281  if (!N->hasNUsesOfValue(1, 0))
11282  return false;
11283 
11284  SDValue TCChain = Chain;
11285  SDNode *Copy = *N->use_begin();
11286  if (Copy->getOpcode() == ISD::CopyToReg) {
11287  // If the copy has a glue operand, we conservatively assume it isn't safe to
11288  // perform a tail call.
11289  if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() ==
11290  MVT::Glue)
11291  return false;
11292  TCChain = Copy->getOperand(0);
11293  } else if (Copy->getOpcode() != ISD::FP_EXTEND)
11294  return false;
11295 
11296  bool HasRet = false;
11297  for (SDNode *Node : Copy->uses()) {
11298  if (Node->getOpcode() != AArch64ISD::RET_FLAG)
11299  return false;
11300  HasRet = true;
11301  }
11302 
11303  if (!HasRet)
11304  return false;
11305 
11306  Chain = TCChain;
11307  return true;
11308 }
11309 
11310 // Return whether the an instruction can potentially be optimized to a tail
11311 // call. This will cause the optimizers to attempt to move, or duplicate,
11312 // return instructions to help enable tail call optimizations for this
11313 // instruction.
11314 bool AArch64TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
11315  return CI->isTailCall();
11316 }
11317 
11318 bool AArch64TargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
11319  SDValue &Offset,
11320  ISD::MemIndexedMode &AM,
11321  bool &IsInc,
11322  SelectionDAG &DAG) const {
11323  if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
11324  return false;
11325 
11326  Base = Op->getOperand(0);
11327  // All of the indexed addressing mode instructions take a signed
11328  // 9 bit immediate offset.
11329  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
11330  int64_t RHSC = RHS->getSExtValue();
11331  if (Op->getOpcode() == ISD::SUB)
11332  RHSC = -(uint64_t)RHSC;
11333  if (!isInt<9>(RHSC))
11334  return false;
11335  IsInc = (Op->getOpcode() == ISD::ADD);
11336  Offset = Op->getOperand(1);
11337  return true;
11338  }
11339  return false;
11340 }
11341 
11342 bool AArch64TargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
11343  SDValue &Offset,
11344  ISD::MemIndexedMode &AM,
11345  SelectionDAG &DAG) const {
11346  EVT VT;
11347  SDValue Ptr;
11348  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
11349  VT = LD->getMemoryVT();
11350  Ptr = LD->getBasePtr();
11351  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
11352  VT = ST->getMemoryVT();
11353  Ptr = ST->getBasePtr();
11354  } else
11355  return false;
11356 
11357  bool IsInc;
11358  if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG))
11359  return false;
11360  AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC;
11361  return true;
11362 }
11363 
11364 bool AArch64TargetLowering::getPostIndexedAddressParts(
11365  SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset,
11366  ISD::MemIndexedMode &AM, SelectionDAG &DAG) const {
11367  EVT VT;
11368  SDValue Ptr;
11369  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
11370  VT = LD->getMemoryVT();
11371  Ptr = LD->getBasePtr();
11372  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
11373  VT = ST->getMemoryVT();
11374  Ptr = ST->getBasePtr();
11375  } else
11376  return false;
11377 
11378  bool IsInc;
11379  if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG))
11380  return false;
11381  // Post-indexing updates the base, so it's not a valid transform
11382  // if that's not the same as the load's pointer.
11383  if (Ptr != Base)
11384  return false;
11385  AM = IsInc ? ISD::POST_INC : ISD::POST_DEC;
11386  return true;
11387 }
11388 
11390  SelectionDAG &DAG) {
11391  SDLoc DL(N);
11392  SDValue Op = N->getOperand(0);
11393 
11394  if (N->getValueType(0) != MVT::i16 || Op.getValueType() != MVT::f16)
11395  return;
11396 
11397  Op = SDValue(
11398  DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32,
11399  DAG.getUNDEF(MVT::i32), Op,
11400  DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
11401  0);
11402  Op = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op);
11403  Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op));
11404 }
11405 
11408  SelectionDAG &DAG, unsigned InterOp,
11409  unsigned AcrossOp) {
11410  EVT LoVT, HiVT;
11411  SDValue Lo, Hi;
11412  SDLoc dl(N);
11413  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
11414  std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
11415  SDValue InterVal = DAG.getNode(InterOp, dl, LoVT, Lo, Hi);
11416  SDValue SplitVal = DAG.getNode(AcrossOp, dl, LoVT, InterVal);
11417  Results.push_back(SplitVal);
11418 }
11419 
11420 static std::pair<SDValue, SDValue> splitInt128(SDValue N, SelectionDAG &DAG) {
11421  SDLoc DL(N);
11422  SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, N);
11424  DAG.getNode(ISD::SRL, DL, MVT::i128, N,
11425  DAG.getConstant(64, DL, MVT::i64)));
11426  return std::make_pair(Lo, Hi);
11427 }
11428 
11429 // Create an even/odd pair of X registers holding integer value V.
11431  SDLoc dl(V.getNode());
11432  SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i64);
11433  SDValue VHi = DAG.getAnyExtOrTrunc(
11434  DAG.getNode(ISD::SRL, dl, MVT::i128, V, DAG.getConstant(64, dl, MVT::i64)),
11435  dl, MVT::i64);
11436  if (DAG.getDataLayout().isBigEndian())
11437  std::swap (VLo, VHi);
11438  SDValue RegClass =
11439  DAG.getTargetConstant(AArch64::XSeqPairsClassRegClassID, dl, MVT::i32);
11440  SDValue SubReg0 = DAG.getTargetConstant(AArch64::sube64, dl, MVT::i32);
11441  SDValue SubReg1 = DAG.getTargetConstant(AArch64::subo64, dl, MVT::i32);
11442  const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
11443  return SDValue(
11444  DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
11445 }
11446 
11449  SelectionDAG &DAG,
11450  const AArch64Subtarget *Subtarget) {
11451  assert(N->getValueType(0) == MVT::i128 &&
11452  "AtomicCmpSwap on types less than 128 should be legal");
11453 
11454  if (Subtarget->hasLSE()) {
11455  // LSE has a 128-bit compare and swap (CASP), but i128 is not a legal type,
11456  // so lower it here, wrapped in REG_SEQUENCE and EXTRACT_SUBREG.
11457  SDValue Ops[] = {
11458  createGPRPairNode(DAG, N->getOperand(2)), // Compare value
11459  createGPRPairNode(DAG, N->getOperand(3)), // Store value
11460  N->getOperand(1), // Ptr
11461  N->getOperand(0), // Chain in
11462  };
11463 
11464  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
11465 
11466  unsigned Opcode;
11467  switch (MemOp->getOrdering()) {
11469  Opcode = AArch64::CASPX;
11470  break;
11472  Opcode = AArch64::CASPAX;
11473  break;
11475  Opcode = AArch64::CASPLX;
11476  break;
11479  Opcode = AArch64::CASPALX;
11480  break;
11481  default:
11482  llvm_unreachable("Unexpected ordering!");
11483  }
11484 
11485  MachineSDNode *CmpSwap = DAG.getMachineNode(
11486  Opcode, SDLoc(N), DAG.getVTList(MVT::Untyped, MVT::Other), Ops);
11487  DAG.setNodeMemRefs(CmpSwap, {MemOp});
11488 
11489  unsigned SubReg1 = AArch64::sube64, SubReg2 = AArch64::subo64;
11490  if (DAG.getDataLayout().isBigEndian())
11491  std::swap(SubReg1, SubReg2);
11492  Results.push_back(DAG.getTargetExtractSubreg(SubReg1, SDLoc(N), MVT::i64,
11493  SDValue(CmpSwap, 0)));
11494  Results.push_back(DAG.getTargetExtractSubreg(SubReg2, SDLoc(N), MVT::i64,
11495  SDValue(CmpSwap, 0)));
11496  Results.push_back(SDValue(CmpSwap, 1)); // Chain out
11497  return;
11498  }
11499 
11500  auto Desired = splitInt128(N->getOperand(2), DAG);
11501  auto New = splitInt128(N->getOperand(3), DAG);
11502  SDValue Ops[] = {N->getOperand(1), Desired.first, Desired.second,
11503  New.first, New.second, N->getOperand(0)};
11504  SDNode *CmpSwap = DAG.getMachineNode(
11505  AArch64::CMP_SWAP_128, SDLoc(N),
11507 
11508  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
11509  DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
11510 
11511  Results.push_back(SDValue(CmpSwap, 0));
11512  Results.push_back(SDValue(CmpSwap, 1));
11513  Results.push_back(SDValue(CmpSwap, 3));
11514 }
11515 
11516 void AArch64TargetLowering::ReplaceNodeResults(
11518  switch (N->getOpcode()) {
11519  default:
11520  llvm_unreachable("Don't know how to custom expand this");
11521  case ISD::BITCAST:
11522  ReplaceBITCASTResults(N, Results, DAG);
11523  return;
11524  case ISD::VECREDUCE_ADD:
11525  case ISD::VECREDUCE_SMAX:
11526  case ISD::VECREDUCE_SMIN:
11527  case ISD::VECREDUCE_UMAX:
11528  case ISD::VECREDUCE_UMIN:
11529  Results.push_back(LowerVECREDUCE(SDValue(N, 0), DAG));
11530  return;
11531 
11532  case AArch64ISD::SADDV:
11534  return;
11535  case AArch64ISD::UADDV:
11537  return;
11538  case AArch64ISD::SMINV:
11540  return;
11541  case AArch64ISD::UMINV:
11543  return;
11544  case AArch64ISD::SMAXV:
11546  return;
11547  case AArch64ISD::UMAXV:
11549  return;
11550  case ISD::FP_TO_UINT:
11551  case ISD::FP_TO_SINT:
11552  assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion");
11553  // Let normal code take care of it by not adding anything to Results.
11554  return;
11555  case ISD::ATOMIC_CMP_SWAP:
11556  ReplaceCMP_SWAP_128Results(N, Results, DAG, Subtarget);
11557  return;
11558  }
11559 }
11560 
11562  if (Subtarget->isTargetAndroid() || Subtarget->isTargetFuchsia())
11564  return true;
11565 }
11566 
11567 unsigned AArch64TargetLowering::combineRepeatedFPDivisors() const {
11568  // Combine multiple FDIVs with the same divisor into multiple FMULs by the
11569  // reciprocal if there are three or more FDIVs.
11570  return 3;
11571 }
11572 
11575  // During type legalization, we prefer to widen v1i8, v1i16, v1i32 to v8i8,
11576  // v4i16, v2i32 instead of to promote.
11577  if (VT == MVT::v1i8 || VT == MVT::v1i16 || VT == MVT::v1i32 ||
11578  VT == MVT::v1f32)
11579  return TypeWidenVector;
11580 
11582 }
11583 
11584 // Loads and stores less than 128-bits are already atomic; ones above that
11585 // are doomed anyway, so defer to the default libcall and blame the OS when
11586 // things go wrong.
11588  unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
11589  return Size == 128;
11590 }
11591 
11592 // Loads and stores less than 128-bits are already atomic; ones above that
11593 // are doomed anyway, so defer to the default libcall and blame the OS when
11594 // things go wrong.
11597  unsigned Size = LI->getType()->getPrimitiveSizeInBits();
11599 }
11600 
11601 // For the real atomic operations, we have ldxr/stxr up to 128 bits,
11604  unsigned Size = AI->getType()->getPrimitiveSizeInBits();
11605  if (Size > 128) return AtomicExpansionKind::None;
11606  // Nand not supported in LSE.
11608  // Leave 128 bits to LLSC.
11609  return (Subtarget->hasLSE() && Size < 128) ? AtomicExpansionKind::None : AtomicExpansionKind::LLSC;
11610 }
11611 
11614  AtomicCmpXchgInst *AI) const {
11615  // If subtarget has LSE, leave cmpxchg intact for codegen.
11616  if (Subtarget->hasLSE())
11618  // At -O0, fast-regalloc cannot cope with the live vregs necessary to
11619  // implement cmpxchg without spilling. If the address being exchanged is also
11620  // on the stack and close enough to the spill slot, this can lead to a
11621  // situation where the monitor always gets cleared and the atomic operation
11622  // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
11623  if (getTargetMachine().getOptLevel() == 0)
11626 }
11627 
11629  AtomicOrdering Ord) const {
11630  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11631  Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
11632  bool IsAcquire = isAcquireOrStronger(Ord);
11633 
11634  // Since i128 isn't legal and intrinsics don't get type-lowered, the ldrexd
11635  // intrinsic must return {i64, i64} and we have to recombine them into a
11636  // single i128 here.
11637  if (ValTy->getPrimitiveSizeInBits() == 128) {
11638  Intrinsic::ID Int =
11640  Function *Ldxr = Intrinsic::getDeclaration(M, Int);
11641 
11642  Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
11643  Value *LoHi = Builder.CreateCall(Ldxr, Addr, "lohi");
11644 
11645  Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
11646  Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
11647  Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
11648  Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
11649  return Builder.CreateOr(
11650  Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
11651  }
11652 
11653  Type *Tys[] = { Addr->getType() };
11654  Intrinsic::ID Int =
11656  Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys);
11657 
11658  return Builder.CreateTruncOrBitCast(
11659  Builder.CreateCall(Ldxr, Addr),
11660  cast<PointerType>(Addr->getType())->getElementType());
11661 }
11662 
11664  IRBuilder<> &Builder) const {
11665  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11667 }
11668 
11670  Value *Val, Value *Addr,
11671  AtomicOrdering Ord) const {
11672  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11673  bool IsRelease = isReleaseOrStronger(Ord);
11674 
11675  // Since the intrinsics must have legal type, the i128 intrinsics take two
11676  // parameters: "i64, i64". We must marshal Val into the appropriate form
11677  // before the call.
11678  if (Val->getType()->getPrimitiveSizeInBits() == 128) {
11679  Intrinsic::ID Int =
11681  Function *Stxr = Intrinsic::getDeclaration(M, Int);
11682  Type *Int64Ty = Type::getInt64Ty(M->getContext());
11683 
11684  Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo");
11685  Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi");
11686  Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
11687  return Builder.CreateCall(Stxr, {Lo, Hi, Addr});
11688  }
11689 
11690  Intrinsic::ID Int =
11692  Type *Tys[] = { Addr->getType() };
11693  Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);
11694 
11695  return Builder.CreateCall(Stxr,
11696  {Builder.CreateZExtOrBitCast(
11697  Val, Stxr->getFunctionType()->getParamType(0)),
11698  Addr});
11699 }
11700 
11702  Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
11703  return Ty->isArrayTy();
11704 }
11705 
11706 bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
11707  EVT) const {
11708  return false;
11709 }
11710 
11711 static Value *UseTlsOffset(IRBuilder<> &IRB, unsigned Offset) {
11712  Module *M = IRB.GetInsertBlock()->getParent()->getParent();
11713  Function *ThreadPointerFunc =
11715  return IRB.CreatePointerCast(
11716  IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), Offset),
11717  Type::getInt8PtrTy(IRB.getContext())->getPointerTo(0));
11718 }
11719 
11721  // Android provides a fixed TLS slot for the stack cookie. See the definition
11722  // of TLS_SLOT_STACK_GUARD in
11723  // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
11724  if (Subtarget->isTargetAndroid())
11725  return UseTlsOffset(IRB, 0x28);
11726 
11727  // Fuchsia is similar.
11728  // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
11729  if (Subtarget->isTargetFuchsia())
11730  return UseTlsOffset(IRB, -0x10);
11731 
11732  return TargetLowering::getIRStackGuard(IRB);
11733 }
11734 
11736  // MSVC CRT provides functionalities for stack protection.
11737  if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) {
11738  // MSVC CRT has a global variable holding security cookie.
11739  M.getOrInsertGlobal("__security_cookie",
11741 
11742  // MSVC CRT has a function to validate security cookie.
11743  auto *SecurityCheckCookie = cast<Function>(
11744  M.getOrInsertFunction("__security_check_cookie",
11747  SecurityCheckCookie->setCallingConv(CallingConv::Win64);
11748  SecurityCheckCookie->addAttribute(1, Attribute::AttrKind::InReg);
11749  return;
11750  }
11752 }
11753 
11755  // MSVC CRT has a global variable holding security cookie.
11756  if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
11757  return M.getGlobalVariable("__security_cookie");
11759 }
11760 
11762  // MSVC CRT has a function to validate security cookie.
11763  if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
11764  return M.getFunction("__security_check_cookie");
11766 }
11767 
11769  // Android provides a fixed TLS slot for the SafeStack pointer. See the
11770  // definition of TLS_SLOT_SAFESTACK in
11771  // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
11772  if (Subtarget->isTargetAndroid())
11773  return UseTlsOffset(IRB, 0x48);
11774 
11775  // Fuchsia is similar.
11776  // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
11777  if (Subtarget->isTargetFuchsia())
11778  return UseTlsOffset(IRB, -0x8);
11779 
11781 }
11782 
11784  const Instruction &AndI) const {
11785  // Only sink 'and' mask to cmp use block if it is masking a single bit, since
11786  // this is likely to be fold the and/cmp/br into a single tbz instruction. It
11787  // may be beneficial to sink in other cases, but we would have to check that
11788  // the cmp would not get folded into the br to form a cbz for these to be
11789  // beneficial.
11791  if (!Mask)
11792  return false;
11793  return Mask->getValue().isPowerOf2();
11794 }
11795 
11797  // Update IsSplitCSR in AArch64unctionInfo.
11799  AFI->setIsSplitCSR(true);
11800 }
11801 
11803  MachineBasicBlock *Entry,
11804  const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
11805  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
11806  const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
11807  if (!IStart)
11808  return;
11809 
11810  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11811  MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
11812  MachineBasicBlock::iterator MBBI = Entry->begin();
11813  for (const MCPhysReg *I = IStart; *I; ++I) {
11814  const TargetRegisterClass *RC = nullptr;
11815  if (AArch64::GPR64RegClass.contains(*I))
11816  RC = &AArch64::GPR64RegClass;
11817  else if (AArch64::FPR64RegClass.contains(*I))
11818  RC = &AArch64::FPR64RegClass;
11819  else
11820  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
11821 
11822  unsigned NewVR = MRI->createVirtualRegister(RC);
11823  // Create copy from CSR to a virtual register.
11824  // FIXME: this currently does not emit CFI pseudo-instructions, it works
11825  // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
11826  // nounwind. If we want to generalize this later, we may need to emit
11827  // CFI pseudo-instructions.
11830  "Function should be nounwind in insertCopiesSplitCSR!");
11831  Entry->addLiveIn(*I);
11832  BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
11833  .addReg(*I);
11834 
11835  // Insert the copy-back instructions right before the terminator.
11836  for (auto *Exit : Exits)
11837  BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
11838  TII->get(TargetOpcode::COPY), *I)
11839  .addReg(NewVR);
11840  }
11841 }
11842 
11844  // Integer division on AArch64 is expensive. However, when aggressively
11845  // optimizing for code size, we prefer to use a div instruction, as it is
11846  // usually smaller than the alternative sequence.
11847  // The exception to this is vector division. Since AArch64 doesn't have vector
11848  // integer division, leaving the division as-is is a loss even in terms of
11849  // size, because it will have to be scalarized, while the alternative code
11850  // sequence can be performed in vector form.
11851  bool OptSize =
11853  return OptSize && !VT.isVector();
11854 }
11855 
11857  return Subtarget->hasAggressiveFMA() && VT.isFloatingPoint();
11858 }
11859 
11860 unsigned
11862  if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
11863  return getPointerTy(DL).getSizeInBits();
11864 
11865  return 3 * getPointerTy(DL).getSizeInBits() + 2 * 32;
11866 }
11867 
11868 void AArch64TargetLowering::finalizeLowering(MachineFunction &MF) const {
11871 }
11872 
11873 // Unlike X86, we let frame lowering assign offsets to all catch objects.
11875  return false;
11876 }
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs...
static bool isAdvSIMDModImmType6(uint64_t Imm)
Type * getVectorElementType() const
Definition: Type.h:371
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt)
isVShiftLImm - Check if this is a valid build_vector for the immediate operand of a vector shift left...
void setFrameAddressIsTaken(bool T)
uint64_t CallInst * C
void setAllowReassociation(bool b)
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:571
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:349
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:538
SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned char TargetFlags=0)
Value * getValueOperand()
Definition: Instructions.h:410
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set, or Regs.size() if they are all allocated.
Helper structure to keep track of SetCC information.
static MVT getIntegerVT(unsigned BitWidth)
static bool isUZP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of "vector_shuffle v...
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
getTgtMemIntrinsic - Represent NEON load and store intrinsics as MemIntrinsicNodes.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:594
static SDValue performBitcastCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
EVT getValueType() const
Return the ValueType of the referenced return value.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address...
Value * CreateConstGEP1_32(Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1516
static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
iterator_range< use_iterator > uses()
Definition: Value.h:355
static bool isConstant(const MachineInstr &MI)
bool isUndef() const
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:35
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand...
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
const GlobalValue * getGlobal() const
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1563
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
LLVMContext & Context
LLVMContext & getContext() const
Definition: IRBuilder.h:123
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for this result type with this index.
DiagnosticInfoOptimizationBase::Argument NV
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond)
Helper function to make it easier to build SetCC&#39;s if you just have an ISD::CondCode instead of an SD...
Definition: SelectionDAG.h:937
bool isMisaligned128StoreSlow() const
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it&#39;s not CSE&#39;d)...
Definition: SelectionDAG.h:836
static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG)
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector...
static SDValue performNEONPostLDSTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Target-specific DAG combine function for NEON load/store intrinsics to merge base address updates...
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:358
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:650
This class represents lattice values for constants.
Definition: AllocatorList.h:24
static bool isUZPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
static bool isTRN_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of "vector_shuffle v...
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
static MVT getVectorVT(MVT VT, unsigned NumElements)
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0...
Definition: ISDOpcodes.h:605
Constant * getOrInsertFunction(StringRef Name, FunctionType *T, AttributeList AttributeList)
Look up the specified function in the module symbol table.
Definition: Module.cpp:144
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:367
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:260
bool hasCustomCallingConv() const
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
LLVM_NODISCARD bool equals_lower(StringRef RHS) const
equals_lower - Check for string equality, ignoring case.
Definition: StringRef.h:176
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:265
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
static SDValue tryCombineToEXTR(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
EXTR instruction extracts a contiguous chunk of bits from two existing registers viewed as a high/low...
static bool isAdvSIMDModImmType12(uint64_t Imm)
bool isOSBinFormatELF() const
Tests whether the OS uses the ELF binary format.
Definition: Triple.h:604
an instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
Definition: Instructions.h:529
static bool isAdvSIMDModImmType4(uint64_t Imm)
bool isVector() const
Return true if this is a vector value type.
static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, unsigned Align=1, bool *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const SDValue & getBasePtr() const
bool predictableSelectIsExpensive() const
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:223
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:383
static bool isINSMask(ArrayRef< int > M, int NumInputElements, bool &DstIsLeft, int &Anomaly)
static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG)
unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
static CondCode getInvertedCondCode(CondCode Code)
unsigned getReg() const
getReg - Returns the register number.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
Definition: StringRef.h:138
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts, adds, and multiplies for this target.
const SDValue & getValue() const
static bool isZIP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of "vector_shuffle v...
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain...
Definition: ISDOpcodes.h:699
This class represents a function call, abstracting a target machine&#39;s calling convention.
unsigned Reg
unsigned char ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:648
static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG)
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
MachineBasicBlock * EmitLoweredCatchRet(MachineInstr &MI, MachineBasicBlock *BB) const
static MVT getFloatingPointVT(unsigned BitWidth)
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:251
int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target...
The C convention as implemented on Windows/x86-64 and AArch64.
Definition: CallingConv.h:154
unsigned getVectorNumElements() const
const SDValue & getChain() const
Function Alias Analysis Results
This instruction constructs a fixed permutation of two input vectors.
static SDValue performSelectCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with the compare-mask instruct...
static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N, SelectionDAG &DAG)
unsigned getAlignment() const
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:321
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
GlobalVariable * getGlobalVariable(StringRef Name) const
Look up the specified global variable in the module symbol table.
Definition: Module.h:387
arg_iterator arg_end()
Definition: Function.h:680
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static uint32_t Concat[]
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:141
F(f)
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address...
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select&#39;s if you just have operands and don&#39;t want to check...
Definition: SelectionDAG.h:950
static SDValue performExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue NormalizeBuildVector(SDValue Op, SelectionDAG &DAG)
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:685
An instruction for reading from memory.
Definition: Instructions.h:168
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, const SDLoc &dl, SelectionDAG &DAG)
static unsigned getDUPLANEOp(EVT EltType)
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL) const
Soften the operands of a comparison.
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:437
static bool isAdvSIMDModImmType3(uint64_t Imm)
[US]{MIN/MAX} - Binary minimum or maximum or signed or unsigned integers.
Definition: ISDOpcodes.h:384
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:876
const SDNodeFlags getFlags() const
an instruction that atomically reads a memory location, combines it with another value, and then stores the result back.
Definition: Instructions.h:692
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned char TargetFlags=0)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG)
static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG)
This defines the Use class.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS, AArch64CC::CondCode CC, bool NoNans, EVT VT, const SDLoc &dl, SelectionDAG &DAG)
Same for subtraction.
Definition: ISDOpcodes.h:254
bool shouldConsiderGEPOffsetSplit() const override
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
static SDValue tryCombineFixedPointConvert(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
Definition: ISDOpcodes.h:353
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:189
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
static bool isZeroExtended(SDNode *N, SelectionDAG &DAG)
static std::pair< SDValue, SDValue > getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG)
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
bool isAnyArgRegReserved(const MachineFunction &MF) const
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it...
bool isCallingConvWin64(CallingConv::ID CC) const
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:435
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
bool hasAttribute(unsigned Index, Attribute::AttrKind Kind) const
Return true if the attribute exists at the given index.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:998
static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG)
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
return AArch64::GPR64RegClass contains(Reg)
SDValue getExternalSymbol(const char *Sym, EVT VT)
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:130
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
bool isTruncatingStore() const
Return true if the op does a truncation before store.
bool isMemLoc() const
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode *> &Visited, SmallVectorImpl< const SDNode *> &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
bool needsCustom() const
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address...
unsigned getFrameRegister(const MachineFunction &MF) const override
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:210
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1135
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:136
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations...
Definition: ISDOpcodes.h:456
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1632
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
static bool isAdvSIMDModImmType7(uint64_t Imm)
static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG)
static bool isTRNMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
bool hasOneUse() const
Return true if there is exactly one use of this node.
cl::opt< bool > EnableAArch64ELFLocalDynamicTLSGeneration("aarch64-elf-ldtls-generation", cl::Hidden, cl::desc("Allow AArch64 Local Dynamic TLS code generation"), cl::init(false))
A description of a memory reference used in the backend.
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:516
static SDValue performTBZCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue performCONDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, unsigned CCIndex, unsigned CmpIndex)
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:371
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static void ReplaceCMP_SWAP_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
const HexagonInstrInfo * TII
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:369
static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG)
static Value * UseTlsOffset(IRBuilder<> &IRB, unsigned Offset)
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
Shift and rotation operations.
Definition: ISDOpcodes.h:410
static SDValue performAddSubLongCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
Class to represent struct types.
Definition: DerivedTypes.h:201
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:244
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth...
Definition: ISDOpcodes.h:393
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
Fold a floating-point multiply by power of two into floating-point to fixed-point conversion...
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:652
BinOp getOperation() const
Definition: Instructions.h:745
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
CallLoweringInfo & setChain(SDValue InChain)
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:191
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:170
unsigned SubReg
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
op_iterator op_end() const
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:743
uint64_t getConstantOperandVal(unsigned i) const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
#define INT64_MAX
Definition: DataTypes.h:77
Value * getSSPStackGuardCheck(const Module &M) const override
If the target has a standard stack protection check function that performs validation and error handl...
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ISD::SETCC ValueType.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Helper structure to keep track of a SET_CC lowered into AArch64 code.
FLT_ROUNDS_ - Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest 2 Round to ...
Definition: ISDOpcodes.h:546
virtual bool useLoadStackGuardNode() const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
This file contains the simple types necessary to represent the attributes associated with functions a...
SimpleValueType SimpleTy
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:304
bool isInConsecutiveRegs() const
The memory access is dereferenceable (i.e., doesn&#39;t trap).
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:460
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N, SelectionDAG &DAG)
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:401
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
This class is used to represent EVT&#39;s, which are used to parameterize some operations.
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
APInt getHiBits(unsigned numBits) const
Compute an APInt containing numBits highbits from this APInt.
Definition: APInt.cpp:511
static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
Turn vector tests of the signbit in the form of: xor (sra X, elt_size(X)-1), -1 into: cmge X...
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG...
Definition: ISDOpcodes.h:73
const BlockAddress * getBlockAddress() const
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE R Default(T Value)
Definition: StringSwitch.h:203
This is an SDNode representing atomic operations.
uint64_t getNumElements() const
Definition: DerivedTypes.h:359
LocInfo getLocInfo() const
#define im(i)
static bool isAdvSIMDModImmType5(uint64_t Imm)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
static StructType * get(LLVMContext &Context, ArrayRef< Type *> Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:342
ELFYAML::ELF_STO Other
Definition: ELFYAML.cpp:784
This file implements a class to represent arbitrary precision integral constant values and operations...
bool is64BitVector() const
Return true if this is a 64-bit vector type.
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:695
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
unsigned getSizeInBits() const
Value * getSafeStackPointerLocation(IRBuilder<> &IRB) const override
If the target has a standard location for the unsafe stack pointer, returns the address of that locat...
static const unsigned PerfectShuffleTable[6561+1]
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1727
int64_t getSExtValue() const
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:419
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:298
void setArgumentStackToRestore(unsigned bytes)
This is a fast-path instruction selection class that generates poor code and doesn&#39;t support illegal ...
Definition: FastISel.h:67
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
#define UINT64_MAX
Definition: DataTypes.h:83
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1732
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1575
Constant * createSequentialMask(IRBuilder<> &Builder, unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
AtomicOrdering getOrdering() const
Return the atomic ordering requirements for this memory operation.
static SDValue performConcatVectorsCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
CATCHPAD - Represents a catchpad instruction.
Definition: ISDOpcodes.h:681
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:398
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:478
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
static bool isEXTMask(ArrayRef< int > M, EVT VT, bool &ReverseEXT, unsigned &Imm)
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision=0, unsigned FormatMaxPadding=3, bool TruncateZero=true) const
Definition: APFloat.h:1167
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
SDValue getRegisterMask(const uint32_t *RegMask)
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:411
const AArch64RegisterInfo * getRegisterInfo() const override
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:402
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:121
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:429
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
static bool isOverflowIntrOpRes(SDValue Op)
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:221
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:138
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
bool enableAggressiveFMAFusion(EVT VT) const override
Enable aggressive FMA fusion on targets that want it.
static bool isAdvSIMDModImmType2(uint64_t Imm)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:852
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:224
An instruction for storing to memory.
Definition: Instructions.h:321
Natural vector cast.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:959
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
op_iterator op_begin() const
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification, or lowering of the constant.
Definition: ISDOpcodes.h:125
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1659
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:576
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:747
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:25
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override
Returns true if the target can instruction select the specified FP immediate natively.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements. ...
Definition: SelectionDAG.h:751
ArrayRef< SDUse > ops() const
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass...
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp, SelectionDAG &DAG)
static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG)
static const MCPhysReg GPRArgRegs[]
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
bool isXRegisterReserved(size_t i) const
Value * CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1742
amdgpu Simplify well known AMD library false Value * Callee
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1020
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void initializeSplitCSR(MachineBasicBlock *Entry) const override
Perform necessary initialization to handle a subset of CSRs explicitly via copies.
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override
For some targets, an LLVM struct type must be broken down into multiple simple types, but the calling convention specifies that the entire struct must be passed in a block of consecutive registers.
bool supportsAddressTopByteIgnored() const
CPU has TBI (top byte of addresses is ignored during HW address translation) and OS enables it...
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
MVT getVectorElementType() const
Value * getOperand(unsigned i) const
Definition: User.h:170
Analysis containing CSE Info
Definition: CSEInfo.cpp:21
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
static bool isREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize...
static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG)
Class to represent pointers.
Definition: DerivedTypes.h:467
unsigned getByValSize() const
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
static bool isEssentiallyExtractSubvector(SDValue N)
This class is used to represent ISD::STORE nodes.
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:524
static SDValue performBRCONDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:118
AArch64SetCCInfo AArch64
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:327
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1182
TargetInstrInfo - Interface to description of machine instruction set.
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:636
bool isLegalICmpImmediate(int64_t) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:229
static bool isAdvSIMDModImmType9(uint64_t Imm)
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen&#39;d directly, or if it should be stack expanded...
bool isZero() const
Return true if the value is positive or negative zero.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool isOSWindows() const
Tests whether the OS is Windows.
Definition: Triple.h:567
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
Return true if it&#39;s free to truncate a value of type FromTy to type ToTy.
The memory access is volatile.
virtual Value * getIRStackGuard(IRBuilder<> &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:610
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:750
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
const SDValue & getBasePtr() const
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:43
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:166
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
FPOpFusion::FPOpFusionMode AllowFPOpFusion
AllowFPOpFusion - This flag is set by the -fuse-fp-ops=xxx option.
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Returns false if N is a bit extraction pattern of (X >> C) & Mask.
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:629
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address...
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
bool requiresStrictAlign() const
unsigned const MachineRegisterInfo * MRI
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:120
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
unsigned countPopulation() const
Count the number of bits set.
Definition: APInt.h:1658
bool isZExtFree(Type *Ty1, Type *Ty2) const override
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
Machine Value Type.
Value * concatenateVectors(IRBuilder<> &Builder, ArrayRef< Value *> Vecs)
Concatenate a list of vectors.
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
GenericSetCCInfo Generic
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
Simple binary floating point operators.
Definition: ISDOpcodes.h:283
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273
MachineBasicBlock * EmitLoweredCatchPad(MachineInstr &MI, MachineBasicBlock *BB) const
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE...
Definition: ISDOpcodes.h:728
iterator_range< value_op_iterator > op_values() const
const SDValue & getOperand(unsigned Num) const
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:934
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:332
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
static const MVT MVT_CC
Value type used for condition codes.
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:232
static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
Definition: SelectionDAG.h:824
unsigned getPrefFunctionAlignment() const
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isAcquireOrStronger(AtomicOrdering ao)
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node&#39;s operand with EXTRACT_SUBVECTOR and return the low/high part.
CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC) const
Selects the correct CCAssignFn for a given CallingConvention value.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
static bool isSetCCOrZExtSetCC(const SDValue &Op, SetCCInfoAndKind &Info)
static EVT getExtensionTo64Bits(const EVT &OrigVT)
static std::pair< SDValue, SDValue > splitInt128(SDValue N, SelectionDAG &DAG)
static bool isAdvSIMDModImmType1(uint64_t Imm)
CombineLevel
Definition: DAGCombine.h:16
static mvt_range fp_valuetypes()
static void ReplaceReductionResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, unsigned InterOp, unsigned AcrossOp)
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:434
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:161
This class provides iterator support for SDUse operands that use a specific SDNode.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself...
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side...
static unsigned getCmpOperandFoldingProfit(SDValue Op)
Returns how profitable it is to fold a comparison&#39;s operand&#39;s shift and/or extension operations...
bool hasNoNaNs() const
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call...
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
bool CombineTo(SDValue O, SDValue N)
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using &#39;From&#39; to use &#39;To&#39; instead.
void computeMaxCallFrameSize(const MachineFunction &MF)
Computes the maximum size of a callframe and the AdjustsStack property.
const uint32_t * getThisReturnPreservedMask(const MachineFunction &MF, CallingConv::ID) const
getThisReturnPreservedMask - Returns a call preserved mask specific to the case that &#39;returned&#39; is on...
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:767
const APInt & getAPIntValue() const
std::string getEVTString() const
This function returns value type as a string, e.g. "i32".
Definition: ValueTypes.cpp:115
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline...
const Triple & getTargetTriple() const
Value * getPointerOperand()
Definition: Instructions.h:285
static bool isAdvSIMDModImmType8(uint64_t Imm)
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:57
void setPrefFunctionAlignment(unsigned Align)
Set the target&#39;s preferred function alignment.
static bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
static mvt_range vector_valuetypes()
arg_iterator arg_begin()
Definition: Function.h:671
self_iterator getIterator()
Definition: ilist_node.h:82
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, unsigned Size=0)
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
The memory access is non-temporal.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
const MCPhysReg * getCalleeSavedRegsViaCopy(const MachineFunction *MF) const
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y)...
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:281
SmallVectorImpl< ForwardedRegister > & getForwardedMustTailRegParms()
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:719
static void changeFPCCToAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1214
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all...
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
lazy value info
unsigned MaxStoresPerMemmove
Specify maximum bytes of store instructions per memmove call.
bool isOSBinFormatMachO() const
Tests whether the environment is MachO.
Definition: Triple.h:614
Helper structure to be able to read SetCC information.
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo...
Definition: ISDOpcodes.h:796
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:556
static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG, const EVT &OrigTy, const EVT &ExtTy, unsigned ExtOpcode)
std::vector< ArgListEntry > ArgListTy
MO_HI12 - This flag indicates that a symbol operand represents the bits 13-24 of a 64-bit address...
virtual Value * getSafeStackPointerLocation(IRBuilder<> &IRB) const
Returns the target-specific address of the unsafe stack pointer.
Extended Value Type.
Definition: ValueTypes.h:34
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:398
static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
static bool isSingletonEXTMask(ArrayRef< int > M, EVT VT, unsigned &Imm)
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2083
This structure contains all information that is necessary for lowering calls.
size_t size() const
Definition: SmallVector.h:53
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1207
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:220
bool isVolatile() const
static SDValue GenerateTBL(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST, EVT VT, EVT MemVT, SelectionDAG &DAG)
const TargetMachine & getTargetMachine() const
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
unsigned getNumOperands() const
Return the number of values used by this operation.
static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1655
static SDValue performSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
unsigned first
static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount, bool &FromHi)
An EXTR instruction is made up of two shifts, ORed together.
The memory access writes data.
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:497
static const int BlockSize
Definition: TarWriter.cpp:34
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass...
static cl::opt< bool > EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden, cl::desc("Allow AArch64 SLI/SRI formation"), cl::init(false))
bool isReleaseOrStronger(AtomicOrdering ao)
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type...
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
SDValue getTargetConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:639
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition: APInt.h:947
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:633
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef...
void dump() const
Dump this node, for debugging.
static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG)
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:404
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst *> Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a ldN intrinsic.
Iterator for intrusive lists based on ilist_node.
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const override
Return true if it is profitable to reduce a load to a smaller type.
CCState - This class holds information needed while lowering arguments and return values...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
static unsigned getIntrinsicID(const SDNode *N)
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V)
static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St, SDValue SplatVal, unsigned NumVecElts)
EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const override
Returns the target specific optimal type for load and store operations as a result of memset...
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:339
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:265
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:148
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:222
MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override
EVT is not used in-tree, but is used by out-of-tree target.
static bool isCMN(SDValue Op, ISD::CondCode CC)
AArch64TargetLowering(const TargetMachine &TM, const AArch64Subtarget &STI)
unsigned getMaximumJumpTableSize() const
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
Module.h This file contains the declarations for the Module class.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:64
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
unsigned getMaximumJumpTableSize() const
Return upper limit for number of entries in a jump table.
static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, unsigned AlignCheck)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:734
Provides information about what library functions are available for the current target.
static bool isLegalArithImmed(uint64_t C)
void dump() const
CCValAssign - Represent assignment of one arg/retval to a location.
static unsigned getExtFactor(SDValue &V)
getExtFactor - Determine the adjustment factor for the position when generating an "extract from vect...
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo)
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:1044
iterator end() const
Definition: ArrayRef.h:138
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:96
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:644
const DataFlowGraph & G
Definition: RDFGraph.cpp:211
An SDNode that represents everything that will be needed to construct a MachineInstr.
static bool isAllConstantBuildVector(const SDValue &PotentialBVec, uint64_t &ConstVal)
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:710
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:413
This is an abstract virtual class for memory operations.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
const Constant * getConstVal() const
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2068
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG)
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:622
Helper structure to keep track of ISD::SET_CC operands.
bool hasMustTailInVarArgFunc() const
Returns true if the function is variadic and contains a musttail call.
Represents one node in the SelectionDAG.
bool isProfitableToHoist(Instruction *I) const override
Check if it is profitable to hoist instruction in then/else to if.
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
Return the operation corresponding to !(X op Y), where &#39;op&#39; is a valid SetCC operation.
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool isSignExtended(SDNode *N, SelectionDAG &DAG)
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
int64_t getImm() const
bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const override
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:679
static bool Enabled
Definition: Statistic.cpp:51
const Function & getFunction() const
Return the LLVM function that this machine code represents.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a stN intrinsic.
static void changeFPCCToANDAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert a DAG fp condition code to an AArch64 CC.
unsigned logBase2() const
Definition: APInt.h:1748
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
static mvt_range integer_valuetypes()
static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
Fold a floating-point divide by power of two into fixed-point to floating-point conversion.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the &#39;usesCustomInserter&#39; fla...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:539
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:176
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
static bool isZIPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:164
Class to represent vector types.
Definition: DerivedTypes.h:393
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns true if the given (atomic) store should be expanded by the IR-level AtomicExpand pass into an...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:56
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT...
Definition: ValueTypes.h:73
void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
EVT getMemoryVT() const
Return the type of the in-memory value.
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
static bool performTBISimplification(SDValue Addr, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Simplify Addr given that the top byte of it is ignored by HW during address translation.
Class for arbitrary precision integers.
Definition: APInt.h:70
unsigned getByValAlign() const
CodeModel::Model getCodeModel() const
Returns the code model.
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
Value * getIRStackGuard(IRBuilder<> &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
iterator_range< use_iterator > uses()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:420
void setMinFunctionAlignment(unsigned Align)
Set the target&#39;s minimum function alignment (in log2(bytes))
static void changeVectorFPCCToAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2, bool &Invert)
changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC usable with the vector...
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:241
bool isPowerOf2() const
Check if this APInt&#39;s value is a power of two greater than zero.
Definition: APInt.h:464
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static use_iterator use_end()
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
void setPrefLoopAlignment(unsigned Align)
Set the target&#39;s preferred loop alignment.
void setMaximumJumpTableSize(unsigned)
Indicate the maximum number of entries in jump tables.
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:468
void UpdateCustomCallPreservedMask(MachineFunction &MF, const uint32_t **Mask) const
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1103
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:471
static SDValue performNVCASTCombine(SDNode *N)
Get rid of unnecessary NVCASTs (that don&#39;t change the type).
bool isTailCall() const
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
LLVM_ATTRIBUTE_ALWAYS_INLINE StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:70
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:478
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:312
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1778
const uint32_t * getWindowsStackProbePreservedMask() const
Stack probing calls preserve different CSRs to the normal CC.
static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG)
virtual Value * getSSPStackGuardCheck(const Module &M) const
If the target has a standard stack protection check function that performs validation and error handl...
CATCHRET - Represents a return from a catch block funclet.
Definition: ISDOpcodes.h:686
static SDValue performPostLD1Combine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, bool IsLaneOp)
Target-specific DAG combine function for post-increment LD1 (lane) and post-increment LD1R...
Flags
Flags values. These may be or&#39;d together.
amdgpu Simplify well known AMD library false Value Value * Arg
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:568
static bool canEmitConjunction(const SDValue Val, bool &CanNegate, bool &MustBeFirst, bool WillNegate, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/SETCC operations that can be expressed as a conjunction...
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
static bool isEquivalentMaskless(unsigned CC, unsigned width, ISD::LoadExtType ExtType, int AddConstant, int CompConstant)
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
Selects the correct CCAssignFn for a given CallingConvention value.
Value * CreateTruncOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1760
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:436
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:638
static mvt_range all_valuetypes()
SimpleValueType Iteration.
Representation of each machine instruction.
Definition: MachineInstr.h:64
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer, a SRCVALUE for the destination, and a SRCVALUE for the source.
Definition: ISDOpcodes.h:724
void UpdateCustomCalleeSavedRegs(MachineFunction &MF) const
static bool isAdvSIMDModImmType10(uint64_t Imm)
#define FALKOR_STRIDED_ACCESS_MD
void emitReservedArgRegCallError(const MachineFunction &MF) const
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
These are IR-level optimization flags that may be propagated to SDNodes.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:673
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:177
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:423
unsigned getNumArgOperands() const
Definition: InstrTypes.h:1133
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
MachineBasicBlock * EmitF128CSEL(MachineInstr &MI, MachineBasicBlock *BB) const
static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG)
Constant * getOrInsertGlobal(StringRef Name, Type *Ty, function_ref< GlobalVariable *()> CreateGlobalCallback)
Look up the specified global in the module symbol table.
Definition: Module.cpp:206
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:387
pointer data()
Return a pointer to the vector&#39;s buffer, even if empty().
Definition: SmallVector.h:149
int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, uint32_t BitWidth) const
If this is a constant FP splat and the splatted constant FP is an exact power or 2, return the log base 2 integer value.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:705
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, bool isSigned, const SDLoc &dl, bool doesNotReturn=false, bool isReturnValueUsed=true) const
Returns a pair of (return value, chain).
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool isPosZero() const
Definition: APFloat.h:1158
unsigned getLocMemOffset() const
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override
This method returns a target specific FastISel object, or null if the target does not support "fast" ...
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:206
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "...
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:182
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:486
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:614
PointerUnion< const Value *, const PseudoSourceValue * > ptrVal
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
TargetOptions Options
Definition: TargetMachine.h:97
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
#define I(x, y, z)
Definition: MD5.cpp:58
static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG)
#define N
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:403
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Flags getFlags() const
Return the raw flags of the source value,.
unsigned MaxGluedStoresPerMemcpy
Specify max number of store instructions to glue in inlined memcpy.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1213
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:595
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page...
The memory access always returns the same value (or traps).
unsigned MaxStoresPerMemmoveOptSize
Maximum number of store instructions that may be substituted for a call to memmove, used for functions with OptSize attribute.
unsigned MaxStoresPerMemcpyOptSize
Maximum number of store operations that may be substituted for a call to memcpy, used for functions w...
Value * emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type...
void setStackPointerRegisterToSaveRestore(unsigned R)
If set to a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save and restore.
static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert, SelectionDAG &DAG)
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm, const APInt &Demanded, TargetLowering::TargetLoweringOpt &TLO, unsigned NewOpc)
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Type * getValueType() const
Definition: GlobalValue.h:276
static SDValue performIntrinsicCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
uint32_t Size
Definition: Profile.cpp:47
Same for multiplication.
Definition: ISDOpcodes.h:257
static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode &St)
Replace a splat of a scalar to a vector store by scalar stores of the scalar value.
static const int LAST_INDEXED_MODE
Definition: ISDOpcodes.h:922
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1974
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool needsFixedCatchObjects() const override
Used for exception handling on Win64.
unsigned getOpcode() const
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:608
SDValue getValue(unsigned R) const
bool useLoadStackGuardNode() const override
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
const uint32_t * getTLSCallPreservedMask() const
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that&#39;s previously inserted by insertSSPDeclarations, if any, otherwise return nul...
unsigned MaxStoresPerMemcpy
Specify maximum bytes of store instructions per memcpy call.
Value * getSDagStackGuard(const Module &M) const override
Return the variable that&#39;s previously inserted by insertSSPDeclarations, if any, otherwise return nul...
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
AArch64CC::CondCode CC
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1722
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
void setBytesInStackArgArea(unsigned bytes)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
bool isRegLoc() const
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, const Value *PtrVal, unsigned Alignment, AtomicOrdering Ordering, SyncScope::ID SSID)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands...
static SDValue replaceZeroVectorStore(SelectionDAG &DAG, StoreSDNode &St)
Replace a splat of zeros to a vector store by scalar stores of WZR/XZR.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:291
const MachinePointerInfo & getPointerInfo() const
bool isLegalAddImmediate(int64_t) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
ConstantSDNode * getConstantSplatNode(BitVector *UndefElements=nullptr) const
Returns the splatted constant or null if this is not a constant splat.
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:345
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if &#39;Op & Mask&#39; is known to be zero.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC)
changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64 CC
MachineMemOperand::Flags getMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo)
Check whether or not Op is a SET_CC operation, either a generic or an AArch64 lowered one...
void setReturnAddressIsTaken(bool s)
unsigned getVaListSizeInBits(const DataLayout &DL) const override
Returns the size of the platform&#39;s va_list object.
bool hasBasePointer(const MachineFunction &MF) const
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
ArrayRef< int > getMask() const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566
LLVM Value Representation.
Definition: Value.h:73
Constant * getPersonalityFn() const
Get the personality function associated with this function.
Definition: Function.cpp:1299
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:302
SDValue getRegister(unsigned Reg, EVT VT)
unsigned getResNo() const
get the index which selects a specific result in the SDNode
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:873
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
virtual void finalizeLowering(MachineFunction &MF) const
Execute target specific actions to finalize target lowering.
const AArch64InstrInfo * getInstrInfo() const override
static void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:606
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue getValueType(EVT)
Value * emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt)
getVShiftImm - Check if this is a valid build_vector for the immediate operand of a vector shift oper...
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:776
static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, bool isSigned)
bool isUndef() const
Return true if the type of the node type undefined.
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1124
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
Type * getElementType() const
Definition: DerivedTypes.h:360
IRTranslator LLVM IR MI
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:413
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow...
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:443
SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
unsigned MaxStoresPerMemsetOptSize
Maximum number of stores operations that may be substituted for the call to memset, used for functions with OptSize attribute.
static bool isConcatMask(ArrayRef< int > Mask, EVT VT, bool SplitLHS)
static bool isAdvSIMDModImmType11(uint64_t Imm)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
bool hasPairedLoad(EVT LoadedType, unsigned &RequiredAligment) const override
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1967
unsigned getNumOperands() const
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:198
static bool isVolatile(Instruction *Inst)
Conversion operators.
Definition: ISDOpcodes.h:465
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
Provide custom lowering hooks for some operations.
unsigned char classifyGlobalFunctionReference(const GlobalValue *GV, const TargetMachine &TM) const
const SDValue & getOperand(unsigned i) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
unsigned getLocReg() const
static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget, const TargetMachine &TM)
uint64_t getZExtValue() const
bool isBigEndian() const
Definition: DataLayout.h:222
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:474
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:126
static void Split(std::vector< std::string > &V, StringRef S)
Splits a string of comma separated items in to a vector of strings.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:393
#define LLVM_DEBUG(X)
Definition: Debug.h:123
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand *> NewMemRefs)
Mutate the specified machine node&#39;s memory references to the provided list.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops...
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:584
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
VectorType * getType() const
Overload to return most specific vector type.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
Value * getPointerOperand()
Definition: Instructions.h:413
static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode, SDValue Operand, SelectionDAG &DAG, int &ExtraSteps)
static SDValue performSRLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static cl::opt< bool > EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden, cl::desc("Enable AArch64 logical imm instruction " "optimization"), cl::init(true))
const SDValue & getBasePtr() const
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:545
static const MachineMemOperand::Flags MOStridedAccess
LLVMContext * getContext() const
Definition: SelectionDAG.h:407
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:375
static bool isSplatMask(const int *Mask, EVT VT)
static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits, APInt &UndefBits)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Type * getElementType() const
Definition: DerivedTypes.h:486
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
unsigned getPrefLoopAlignment() const
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144
static bool canGuaranteeTCO(CallingConv::ID CC)
Return true if the calling convention is one that we can guarantee TCO for.
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:221
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
void insertCopiesSplitCSR(MachineBasicBlock *Entry, const SmallVectorImpl< MachineBasicBlock *> &Exits) const override
Insert explicit copies in entry and exit blocks.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:622
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG)
void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
gep_type_iterator gep_type_begin(const User *GEP)
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:914
BRIND - Indirect branch.
Definition: ISDOpcodes.h:634
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:380
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
This class is used to represent ISD::LOAD nodes.
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary...
Definition: ISDOpcodes.h:623
static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AArch64cc, SelectionDAG &DAG, const SDLoc &dl)