LLVM  8.0.1
AMDGPUISelLowering.cpp
Go to the documentation of this file.
1 //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// This is the parent TargetLowering class for hardware code gen
12 /// targets.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #define AMDGPU_LOG2E_F 1.44269504088896340735992468100189214f
17 #define AMDGPU_LN2_F 0.693147180559945309417232121458176568f
18 #define AMDGPU_LN10_F 2.30258509299404568401799145468436421f
19 
20 #include "AMDGPUISelLowering.h"
21 #include "AMDGPU.h"
22 #include "AMDGPUCallLowering.h"
23 #include "AMDGPUFrameLowering.h"
24 #include "AMDGPUIntrinsicInfo.h"
25 #include "AMDGPURegisterInfo.h"
26 #include "AMDGPUSubtarget.h"
27 #include "AMDGPUTargetMachine.h"
28 #include "Utils/AMDGPUBaseInfo.h"
30 #include "SIInstrInfo.h"
31 #include "SIMachineFunctionInfo.h"
33 #include "llvm/CodeGen/Analysis.h"
39 #include "llvm/IR/DataLayout.h"
40 #include "llvm/IR/DiagnosticInfo.h"
41 #include "llvm/Support/KnownBits.h"
42 using namespace llvm;
43 
44 static bool allocateCCRegs(unsigned ValNo, MVT ValVT, MVT LocVT,
45  CCValAssign::LocInfo LocInfo,
46  ISD::ArgFlagsTy ArgFlags, CCState &State,
47  const TargetRegisterClass *RC,
48  unsigned NumRegs) {
49  ArrayRef<MCPhysReg> RegList = makeArrayRef(RC->begin(), NumRegs);
50  unsigned RegResult = State.AllocateReg(RegList);
51  if (RegResult == AMDGPU::NoRegister)
52  return false;
53 
54  State.addLoc(CCValAssign::getReg(ValNo, ValVT, RegResult, LocVT, LocInfo));
55  return true;
56 }
57 
58 static bool allocateSGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT,
59  CCValAssign::LocInfo LocInfo,
60  ISD::ArgFlagsTy ArgFlags, CCState &State) {
61  switch (LocVT.SimpleTy) {
62  case MVT::i64:
63  case MVT::f64:
64  case MVT::v2i32:
65  case MVT::v2f32:
66  case MVT::v4i16:
67  case MVT::v4f16: {
68  // Up to SGPR0-SGPR39
69  return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State,
70  &AMDGPU::SGPR_64RegClass, 20);
71  }
72  default:
73  return false;
74  }
75 }
76 
77 // Allocate up to VGPR31.
78 //
79 // TODO: Since there are no VGPR alignent requirements would it be better to
80 // split into individual scalar registers?
81 static bool allocateVGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT,
82  CCValAssign::LocInfo LocInfo,
83  ISD::ArgFlagsTy ArgFlags, CCState &State) {
84  switch (LocVT.SimpleTy) {
85  case MVT::i64:
86  case MVT::f64:
87  case MVT::v2i32:
88  case MVT::v2f32:
89  case MVT::v4i16:
90  case MVT::v4f16: {
91  return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State,
92  &AMDGPU::VReg_64RegClass, 31);
93  }
94  case MVT::v4i32:
95  case MVT::v4f32:
96  case MVT::v2i64:
97  case MVT::v2f64: {
98  return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State,
99  &AMDGPU::VReg_128RegClass, 29);
100  }
101  case MVT::v8i32:
102  case MVT::v8f32: {
103  return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State,
104  &AMDGPU::VReg_256RegClass, 25);
105 
106  }
107  case MVT::v16i32:
108  case MVT::v16f32: {
109  return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State,
110  &AMDGPU::VReg_512RegClass, 17);
111 
112  }
113  default:
114  return false;
115  }
116 }
117 
118 #include "AMDGPUGenCallingConv.inc"
119 
120 // Find a larger type to do a load / store of a vector with.
122  unsigned StoreSize = VT.getStoreSizeInBits();
123  if (StoreSize <= 32)
124  return EVT::getIntegerVT(Ctx, StoreSize);
125 
126  assert(StoreSize % 32 == 0 && "Store size not a multiple of 32");
127  return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32);
128 }
129 
131  EVT VT = Op.getValueType();
132  KnownBits Known = DAG.computeKnownBits(Op);
133  return VT.getSizeInBits() - Known.countMinLeadingZeros();
134 }
135 
137  EVT VT = Op.getValueType();
138 
139  // In order for this to be a signed 24-bit value, bit 23, must
140  // be a sign bit.
141  return VT.getSizeInBits() - DAG.ComputeNumSignBits(Op);
142 }
143 
145  const AMDGPUSubtarget &STI)
146  : TargetLowering(TM), Subtarget(&STI) {
147  // Lower floating point store/load to integer store/load to reduce the number
148  // of patterns in tablegen.
151 
154 
157 
160 
163 
166 
169 
172 
175 
176  // There are no 64-bit extloads. These should be done as a 32-bit extload and
177  // an extension to 64-bit.
178  for (MVT VT : MVT::integer_valuetypes()) {
182  }
183 
184  for (MVT VT : MVT::integer_valuetypes()) {
185  if (VT == MVT::i64)
186  continue;
187 
192 
197 
202  }
203 
204  for (MVT VT : MVT::integer_vector_valuetypes()) {
217  }
218 
223 
228 
233 
236 
239 
242 
245 
248 
251 
254 
257 
260 
265 
270 
275 
278 
281 
284 
287 
288 
293 
296 
297  // This is totally unsupported, just custom lower to produce an error.
299 
300  // Library functions. These default to Expand, but we have instructions
301  // for them.
312 
315 
319 
320 
323 
326 
327  // Expand to fneg + fadd.
329 
340 
344 
345  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
346  for (MVT VT : ScalarIntVTs) {
347  // These should use [SU]DIVREM, so set them to expand
352 
353  // GPU does not have divrem function for signed or unsigned.
356 
357  // GPU does not have [S|U]MUL_LOHI functions as a single instruction.
360 
364 
365  // AMDGPU uses ADDC/SUBC/ADDE/SUBE
370  }
371 
372  // The hardware supports 32-bit ROTR, but not ROTL.
374  setOperationAction(ISD::ROTL, MVT::i64, Expand);
375  setOperationAction(ISD::ROTR, MVT::i64, Expand);
376 
377  setOperationAction(ISD::MUL, MVT::i64, Expand);
385 
390 
391  setOperationAction(ISD::CTTZ, MVT::i64, Custom);
393  setOperationAction(ISD::CTLZ, MVT::i64, Custom);
395 
396  static const MVT::SimpleValueType VectorIntTypes[] = {
398  };
399 
400  for (MVT VT : VectorIntTypes) {
401  // Expand the following operations for the current type by default.
436  }
437 
438  static const MVT::SimpleValueType FloatVectorTypes[] = {
440  };
441 
442  for (MVT VT : FloatVectorTypes) {
473  }
474 
475  // This causes using an unrolled select operation rather than expansion with
476  // bit operations. This is in general better, but the alternative using BFI
477  // instructions may be better if the select sources are SGPRs.
480 
481  setOperationAction(ISD::SELECT, MVT::v4f32, Promote);
482  AddPromotedToType(ISD::SELECT, MVT::v4f32, MVT::v4i32);
483 
484  // There are no libcalls of any kind.
485  for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I)
486  setLibcallName(static_cast<RTLIB::Libcall>(I), nullptr);
487 
490 
492  setJumpIsExpensive(true);
493 
494  // FIXME: This is only partially true. If we have to do vector compares, any
495  // SGPR pair can be a condition register. If we have a uniform condition, we
496  // are better off doing SALU operations, where there is only one SCC. For now,
497  // we don't have a way of knowing during instruction selection if a condition
498  // will be uniform and we always use vector compares. Assume we are using
499  // vector compares until that is fixed.
501 
503 
504  // We want to find all load dependencies for long chains of stores to enable
505  // merging into very wide vectors. The problem is with vectors with > 4
506  // elements. MergeConsecutiveStores will attempt to merge these because x8/x16
507  // vectors are a legal type, even though we have to split the loads
508  // usually. When we can more precisely specify load legality per address
509  // space, we should be able to make FindBetterChain/MergeConsecutiveStores
510  // smarter so that they can figure out what to do in 2 iterations without all
511  // N > 4 stores on the same chain.
513 
514  // memcpy/memmove/memset are expanded in the IR, so we shouldn't need to worry
515  // about these during lowering.
516  MaxStoresPerMemcpy = 0xffffffff;
517  MaxStoresPerMemmove = 0xffffffff;
518  MaxStoresPerMemset = 0xffffffff;
519 
537 }
538 
539 //===----------------------------------------------------------------------===//
540 // Target Information
541 //===----------------------------------------------------------------------===//
542 
544 static bool fnegFoldsIntoOp(unsigned Opc) {
545  switch (Opc) {
546  case ISD::FADD:
547  case ISD::FSUB:
548  case ISD::FMUL:
549  case ISD::FMA:
550  case ISD::FMAD:
551  case ISD::FMINNUM:
552  case ISD::FMAXNUM:
553  case ISD::FMINNUM_IEEE:
554  case ISD::FMAXNUM_IEEE:
555  case ISD::FSIN:
556  case ISD::FTRUNC:
557  case ISD::FRINT:
558  case ISD::FNEARBYINT:
559  case ISD::FCANONICALIZE:
560  case AMDGPUISD::RCP:
563  case AMDGPUISD::SIN_HW:
567  case AMDGPUISD::FMED3:
568  return true;
569  default:
570  return false;
571  }
572 }
573 
574 /// \p returns true if the operation will definitely need to use a 64-bit
575 /// encoding, and thus will use a VOP3 encoding regardless of the source
576 /// modifiers.
578 static bool opMustUseVOP3Encoding(const SDNode *N, MVT VT) {
579  return N->getNumOperands() > 2 || VT == MVT::f64;
580 }
581 
582 // Most FP instructions support source modifiers, but this could be refined
583 // slightly.
585 static bool hasSourceMods(const SDNode *N) {
586  if (isa<MemSDNode>(N))
587  return false;
588 
589  switch (N->getOpcode()) {
590  case ISD::CopyToReg:
591  case ISD::SELECT:
592  case ISD::FDIV:
593  case ISD::FREM:
594  case ISD::INLINEASM:
598 
599  // TODO: Should really be looking at the users of the bitcast. These are
600  // problematic because bitcasts are used to legalize all stores to integer
601  // types.
602  case ISD::BITCAST:
603  return false;
604  default:
605  return true;
606  }
607 }
608 
610  unsigned CostThreshold) {
611  // Some users (such as 3-operand FMA/MAD) must use a VOP3 encoding, and thus
612  // it is truly free to use a source modifier in all cases. If there are
613  // multiple users but for each one will necessitate using VOP3, there will be
614  // a code size increase. Try to avoid increasing code size unless we know it
615  // will save on the instruction count.
616  unsigned NumMayIncreaseSize = 0;
617  MVT VT = N->getValueType(0).getScalarType().getSimpleVT();
618 
619  // XXX - Should this limit number of uses to check?
620  for (const SDNode *U : N->uses()) {
621  if (!hasSourceMods(U))
622  return false;
623 
624  if (!opMustUseVOP3Encoding(U, VT)) {
625  if (++NumMayIncreaseSize > CostThreshold)
626  return false;
627  }
628  }
629 
630  return true;
631 }
632 
634  return MVT::i32;
635 }
636 
638  return true;
639 }
640 
641 // The backend supports 32 and 64 bit floating point immediates.
642 // FIXME: Why are we reporting vectors of FP immediates as legal?
643 bool AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
644  EVT ScalarVT = VT.getScalarType();
645  return (ScalarVT == MVT::f32 || ScalarVT == MVT::f64 ||
646  (ScalarVT == MVT::f16 && Subtarget->has16BitInsts()));
647 }
648 
649 // We don't want to shrink f64 / f32 constants.
651  EVT ScalarVT = VT.getScalarType();
652  return (ScalarVT != MVT::f32 && ScalarVT != MVT::f64);
653 }
654 
656  ISD::LoadExtType ExtTy,
657  EVT NewVT) const {
658  // TODO: This may be worth removing. Check regression tests for diffs.
659  if (!TargetLoweringBase::shouldReduceLoadWidth(N, ExtTy, NewVT))
660  return false;
661 
662  unsigned NewSize = NewVT.getStoreSizeInBits();
663 
664  // If we are reducing to a 32-bit load, this is always better.
665  if (NewSize == 32)
666  return true;
667 
668  EVT OldVT = N->getValueType(0);
669  unsigned OldSize = OldVT.getStoreSizeInBits();
670 
671  MemSDNode *MN = cast<MemSDNode>(N);
672  unsigned AS = MN->getAddressSpace();
673  // Do not shrink an aligned scalar load to sub-dword.
674  // Scalar engine cannot do sub-dword loads.
675  if (OldSize >= 32 && NewSize < 32 && MN->getAlignment() >= 4 &&
678  (isa<LoadSDNode>(N) &&
679  AS == AMDGPUAS::GLOBAL_ADDRESS && MN->isInvariant())) &&
681  return false;
682 
683  // Don't produce extloads from sub 32-bit types. SI doesn't have scalar
684  // extloads, so doing one requires using a buffer_load. In cases where we
685  // still couldn't use a scalar load, using the wider load shouldn't really
686  // hurt anything.
687 
688  // If the old size already had to be an extload, there's no harm in continuing
689  // to reduce the width.
690  return (OldSize < 32);
691 }
692 
694  EVT CastTy) const {
695 
696  assert(LoadTy.getSizeInBits() == CastTy.getSizeInBits());
697 
698  if (LoadTy.getScalarType() == MVT::i32)
699  return false;
700 
701  unsigned LScalarSize = LoadTy.getScalarSizeInBits();
702  unsigned CastScalarSize = CastTy.getScalarSizeInBits();
703 
704  return (LScalarSize < CastScalarSize) ||
705  (CastScalarSize >= 32);
706 }
707 
708 // SI+ has instructions for cttz / ctlz for 32-bit values. This is probably also
709 // profitable with the expansion for 64-bit since it's generally good to
710 // speculate things.
711 // FIXME: These should really have the size as a parameter.
713  return true;
714 }
715 
717  return true;
718 }
719 
721  switch (N->getOpcode()) {
722  default:
723  return false;
724  case ISD::EntryToken:
725  case ISD::TokenFactor:
726  return true;
728  {
729  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
730  switch (IntrID) {
731  default:
732  return false;
735  return true;
736  }
737  }
738  break;
739  case ISD::LOAD:
740  {
741  const LoadSDNode * L = dyn_cast<LoadSDNode>(N);
742  if (L->getMemOperand()->getAddrSpace()
744  return true;
745  return false;
746  }
747  break;
748  }
749 }
750 
751 //===---------------------------------------------------------------------===//
752 // Target Properties
753 //===---------------------------------------------------------------------===//
754 
756  assert(VT.isFloatingPoint());
757 
758  // Packed operations do not have a fabs modifier.
759  return VT == MVT::f32 || VT == MVT::f64 ||
760  (Subtarget->has16BitInsts() && VT == MVT::f16);
761 }
762 
764  assert(VT.isFloatingPoint());
765  return VT == MVT::f32 || VT == MVT::f64 ||
766  (Subtarget->has16BitInsts() && VT == MVT::f16) ||
767  (Subtarget->hasVOP3PInsts() && VT == MVT::v2f16);
768 }
769 
771  unsigned NumElem,
772  unsigned AS) const {
773  return true;
774 }
775 
777  // There are few operations which truly have vector input operands. Any vector
778  // operation is going to involve operations on each component, and a
779  // build_vector will be a copy per element, so it always makes sense to use a
780  // build_vector input in place of the extracted element to avoid a copy into a
781  // super register.
782  //
783  // We should probably only do this if all users are extracts only, but this
784  // should be the common case.
785  return true;
786 }
787 
789  // Truncate is just accessing a subregister.
790 
791  unsigned SrcSize = Source.getSizeInBits();
792  unsigned DestSize = Dest.getSizeInBits();
793 
794  return DestSize < SrcSize && DestSize % 32 == 0 ;
795 }
796 
798  // Truncate is just accessing a subregister.
799 
800  unsigned SrcSize = Source->getScalarSizeInBits();
801  unsigned DestSize = Dest->getScalarSizeInBits();
802 
803  if (DestSize== 16 && Subtarget->has16BitInsts())
804  return SrcSize >= 32;
805 
806  return DestSize < SrcSize && DestSize % 32 == 0;
807 }
808 
809 bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const {
810  unsigned SrcSize = Src->getScalarSizeInBits();
811  unsigned DestSize = Dest->getScalarSizeInBits();
812 
813  if (SrcSize == 16 && Subtarget->has16BitInsts())
814  return DestSize >= 32;
815 
816  return SrcSize == 32 && DestSize == 64;
817 }
818 
819 bool AMDGPUTargetLowering::isZExtFree(EVT Src, EVT Dest) const {
820  // Any register load of a 64-bit value really requires 2 32-bit moves. For all
821  // practical purposes, the extra mov 0 to load a 64-bit is free. As used,
822  // this will enable reducing 64-bit operations the 32-bit, which is always
823  // good.
824 
825  if (Src == MVT::i16)
826  return Dest == MVT::i32 ||Dest == MVT::i64 ;
827 
828  return Src == MVT::i32 && Dest == MVT::i64;
829 }
830 
832  return isZExtFree(Val.getValueType(), VT2);
833 }
834 
836  // There aren't really 64-bit registers, but pairs of 32-bit ones and only a
837  // limited number of native 64-bit operations. Shrinking an operation to fit
838  // in a single 32-bit register should always be helpful. As currently used,
839  // this is much less general than the name suggests, and is only used in
840  // places trying to reduce the sizes of loads. Shrinking loads to < 32-bits is
841  // not profitable, and may actually be harmful.
842  return SrcVT.getSizeInBits() > 32 && DestVT.getSizeInBits() == 32;
843 }
844 
845 //===---------------------------------------------------------------------===//
846 // TargetLowering Callbacks
847 //===---------------------------------------------------------------------===//
848 
850  bool IsVarArg) {
851  switch (CC) {
854  llvm_unreachable("kernels should not be handled here");
862  return CC_AMDGPU;
863  case CallingConv::C:
864  case CallingConv::Fast:
865  case CallingConv::Cold:
866  return CC_AMDGPU_Func;
867  default:
868  report_fatal_error("Unsupported calling convention.");
869  }
870 }
871 
873  bool IsVarArg) {
874  switch (CC) {
877  llvm_unreachable("kernels should not be handled here");
885  return RetCC_SI_Shader;
886  case CallingConv::C:
887  case CallingConv::Fast:
888  case CallingConv::Cold:
889  return RetCC_AMDGPU_Func;
890  default:
891  report_fatal_error("Unsupported calling convention.");
892  }
893 }
894 
895 /// The SelectionDAGBuilder will automatically promote function arguments
896 /// with illegal types. However, this does not work for the AMDGPU targets
897 /// since the function arguments are stored in memory as these illegal types.
898 /// In order to handle this properly we need to get the original types sizes
899 /// from the LLVM IR Function and fixup the ISD:InputArg values before
900 /// passing them to AnalyzeFormalArguments()
901 
902 /// When the SelectionDAGBuilder computes the Ins, it takes care of splitting
903 /// input values across multiple registers. Each item in the Ins array
904 /// represents a single value that will be stored in registers. Ins[x].VT is
905 /// the value type of the value that will be stored in the register, so
906 /// whatever SDNode we lower the argument to needs to be this type.
907 ///
908 /// In order to correctly lower the arguments we need to know the size of each
909 /// argument. Since Ins[x].VT gives us the size of the register that will
910 /// hold the value, we need to look at Ins[x].ArgVT to see the 'real' type
911 /// for the orignal function argument so that we can deduce the correct memory
912 /// type to use for Ins[x]. In most cases the correct memory type will be
913 /// Ins[x].ArgVT. However, this will not always be the case. If, for example,
914 /// we have a kernel argument of type v8i8, this argument will be split into
915 /// 8 parts and each part will be represented by its own item in the Ins array.
916 /// For each part the Ins[x].ArgVT will be the v8i8, which is the full type of
917 /// the argument before it was split. From this, we deduce that the memory type
918 /// for each individual part is i8. We pass the memory type as LocVT to the
919 /// calling convention analysis function and the register type (Ins[x].VT) as
920 /// the ValVT.
922  CCState &State,
923  const SmallVectorImpl<ISD::InputArg> &Ins) const {
924  const MachineFunction &MF = State.getMachineFunction();
925  const Function &Fn = MF.getFunction();
926  LLVMContext &Ctx = Fn.getParent()->getContext();
928  const unsigned ExplicitOffset = ST.getExplicitKernelArgOffset(Fn);
930 
931  unsigned MaxAlign = 1;
932  uint64_t ExplicitArgOffset = 0;
933  const DataLayout &DL = Fn.getParent()->getDataLayout();
934 
935  unsigned InIndex = 0;
936 
937  for (const Argument &Arg : Fn.args()) {
938  Type *BaseArgTy = Arg.getType();
939  unsigned Align = DL.getABITypeAlignment(BaseArgTy);
940  MaxAlign = std::max(Align, MaxAlign);
941  unsigned AllocSize = DL.getTypeAllocSize(BaseArgTy);
942 
943  uint64_t ArgOffset = alignTo(ExplicitArgOffset, Align) + ExplicitOffset;
944  ExplicitArgOffset = alignTo(ExplicitArgOffset, Align) + AllocSize;
945 
946  // We're basically throwing away everything passed into us and starting over
947  // to get accurate in-memory offsets. The "PartOffset" is completely useless
948  // to us as computed in Ins.
949  //
950  // We also need to figure out what type legalization is trying to do to get
951  // the correct memory offsets.
952 
953  SmallVector<EVT, 16> ValueVTs;
955  ComputeValueVTs(*this, DL, BaseArgTy, ValueVTs, &Offsets, ArgOffset);
956 
957  for (unsigned Value = 0, NumValues = ValueVTs.size();
958  Value != NumValues; ++Value) {
959  uint64_t BasePartOffset = Offsets[Value];
960 
961  EVT ArgVT = ValueVTs[Value];
962  EVT MemVT = ArgVT;
963  MVT RegisterVT = getRegisterTypeForCallingConv(Ctx, CC, ArgVT);
964  unsigned NumRegs = getNumRegistersForCallingConv(Ctx, CC, ArgVT);
965 
966  if (NumRegs == 1) {
967  // This argument is not split, so the IR type is the memory type.
968  if (ArgVT.isExtended()) {
969  // We have an extended type, like i24, so we should just use the
970  // register type.
971  MemVT = RegisterVT;
972  } else {
973  MemVT = ArgVT;
974  }
975  } else if (ArgVT.isVector() && RegisterVT.isVector() &&
976  ArgVT.getScalarType() == RegisterVT.getScalarType()) {
977  assert(ArgVT.getVectorNumElements() > RegisterVT.getVectorNumElements());
978  // We have a vector value which has been split into a vector with
979  // the same scalar type, but fewer elements. This should handle
980  // all the floating-point vector types.
981  MemVT = RegisterVT;
982  } else if (ArgVT.isVector() &&
983  ArgVT.getVectorNumElements() == NumRegs) {
984  // This arg has been split so that each element is stored in a separate
985  // register.
986  MemVT = ArgVT.getScalarType();
987  } else if (ArgVT.isExtended()) {
988  // We have an extended type, like i65.
989  MemVT = RegisterVT;
990  } else {
991  unsigned MemoryBits = ArgVT.getStoreSizeInBits() / NumRegs;
992  assert(ArgVT.getStoreSizeInBits() % NumRegs == 0);
993  if (RegisterVT.isInteger()) {
994  MemVT = EVT::getIntegerVT(State.getContext(), MemoryBits);
995  } else if (RegisterVT.isVector()) {
996  assert(!RegisterVT.getScalarType().isFloatingPoint());
997  unsigned NumElements = RegisterVT.getVectorNumElements();
998  assert(MemoryBits % NumElements == 0);
999  // This vector type has been split into another vector type with
1000  // a different elements size.
1001  EVT ScalarVT = EVT::getIntegerVT(State.getContext(),
1002  MemoryBits / NumElements);
1003  MemVT = EVT::getVectorVT(State.getContext(), ScalarVT, NumElements);
1004  } else {
1005  llvm_unreachable("cannot deduce memory type.");
1006  }
1007  }
1008 
1009  // Convert one element vectors to scalar.
1010  if (MemVT.isVector() && MemVT.getVectorNumElements() == 1)
1011  MemVT = MemVT.getScalarType();
1012 
1013  if (MemVT.isExtended()) {
1014  // This should really only happen if we have vec3 arguments
1015  assert(MemVT.isVector() && MemVT.getVectorNumElements() == 3);
1016  MemVT = MemVT.getPow2VectorType(State.getContext());
1017  }
1018 
1019  unsigned PartOffset = 0;
1020  for (unsigned i = 0; i != NumRegs; ++i) {
1021  State.addLoc(CCValAssign::getCustomMem(InIndex++, RegisterVT,
1022  BasePartOffset + PartOffset,
1023  MemVT.getSimpleVT(),
1025  PartOffset += MemVT.getStoreSize();
1026  }
1027  }
1028  }
1029 }
1030 
1032  SDValue Chain, CallingConv::ID CallConv,
1033  bool isVarArg,
1034  const SmallVectorImpl<ISD::OutputArg> &Outs,
1035  const SmallVectorImpl<SDValue> &OutVals,
1036  const SDLoc &DL, SelectionDAG &DAG) const {
1037  // FIXME: Fails for r600 tests
1038  //assert(!isVarArg && Outs.empty() && OutVals.empty() &&
1039  // "wave terminate should not have return values");
1040  return DAG.getNode(AMDGPUISD::ENDPGM, DL, MVT::Other, Chain);
1041 }
1042 
1043 //===---------------------------------------------------------------------===//
1044 // Target specific lowering
1045 //===---------------------------------------------------------------------===//
1046 
1047 /// Selects the correct CCAssignFn for a given CallingConvention value.
1049  bool IsVarArg) {
1050  return AMDGPUCallLowering::CCAssignFnForCall(CC, IsVarArg);
1051 }
1052 
1054  bool IsVarArg) {
1055  return AMDGPUCallLowering::CCAssignFnForReturn(CC, IsVarArg);
1056 }
1057 
1059  SelectionDAG &DAG,
1060  MachineFrameInfo &MFI,
1061  int ClobberedFI) const {
1062  SmallVector<SDValue, 8> ArgChains;
1063  int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
1064  int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
1065 
1066  // Include the original chain at the beginning of the list. When this is
1067  // used by target LowerCall hooks, this helps legalize find the
1068  // CALLSEQ_BEGIN node.
1069  ArgChains.push_back(Chain);
1070 
1071  // Add a chain value for each stack argument corresponding
1072  for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
1073  UE = DAG.getEntryNode().getNode()->use_end();
1074  U != UE; ++U) {
1075  if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U)) {
1076  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr())) {
1077  if (FI->getIndex() < 0) {
1078  int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
1079  int64_t InLastByte = InFirstByte;
1080  InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
1081 
1082  if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
1083  (FirstByte <= InFirstByte && InFirstByte <= LastByte))
1084  ArgChains.push_back(SDValue(L, 1));
1085  }
1086  }
1087  }
1088  }
1089 
1090  // Build a tokenfactor for all the chains.
1091  return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
1092 }
1093 
1095  SmallVectorImpl<SDValue> &InVals,
1096  StringRef Reason) const {
1097  SDValue Callee = CLI.Callee;
1098  SelectionDAG &DAG = CLI.DAG;
1099 
1100  const Function &Fn = DAG.getMachineFunction().getFunction();
1101 
1102  StringRef FuncName("<unknown>");
1103 
1104  if (const ExternalSymbolSDNode *G = dyn_cast<ExternalSymbolSDNode>(Callee))
1105  FuncName = G->getSymbol();
1106  else if (const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
1107  FuncName = G->getGlobal()->getName();
1108 
1109  DiagnosticInfoUnsupported NoCalls(
1110  Fn, Reason + FuncName, CLI.DL.getDebugLoc());
1111  DAG.getContext()->diagnose(NoCalls);
1112 
1113  if (!CLI.IsTailCall) {
1114  for (unsigned I = 0, E = CLI.Ins.size(); I != E; ++I)
1115  InVals.push_back(DAG.getUNDEF(CLI.Ins[I].VT));
1116  }
1117 
1118  return DAG.getEntryNode();
1119 }
1120 
1122  SmallVectorImpl<SDValue> &InVals) const {
1123  return lowerUnhandledCall(CLI, InVals, "unsupported call to function ");
1124 }
1125 
1127  SelectionDAG &DAG) const {
1128  const Function &Fn = DAG.getMachineFunction().getFunction();
1129 
1130  DiagnosticInfoUnsupported NoDynamicAlloca(Fn, "unsupported dynamic alloca",
1131  SDLoc(Op).getDebugLoc());
1132  DAG.getContext()->diagnose(NoDynamicAlloca);
1133  auto Ops = {DAG.getConstant(0, SDLoc(), Op.getValueType()), Op.getOperand(0)};
1134  return DAG.getMergeValues(Ops, SDLoc());
1135 }
1136 
1138  SelectionDAG &DAG) const {
1139  switch (Op.getOpcode()) {
1140  default:
1141  Op->print(errs(), &DAG);
1142  llvm_unreachable("Custom lowering code for this"
1143  "instruction is not implemented yet!");
1144  break;
1145  case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
1146  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
1147  case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
1148  case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
1149  case ISD::SDIVREM: return LowerSDIVREM(Op, DAG);
1150  case ISD::FREM: return LowerFREM(Op, DAG);
1151  case ISD::FCEIL: return LowerFCEIL(Op, DAG);
1152  case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
1153  case ISD::FRINT: return LowerFRINT(Op, DAG);
1154  case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG);
1155  case ISD::FROUND: return LowerFROUND(Op, DAG);
1156  case ISD::FFLOOR: return LowerFFLOOR(Op, DAG);
1157  case ISD::FLOG:
1158  return LowerFLOG(Op, DAG, 1 / AMDGPU_LOG2E_F);
1159  case ISD::FLOG10:
1160  return LowerFLOG(Op, DAG, AMDGPU_LN2_F / AMDGPU_LN10_F);
1161  case ISD::FEXP:
1162  return lowerFEXP(Op, DAG);
1163  case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
1164  case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
1165  case ISD::FP_TO_FP16: return LowerFP_TO_FP16(Op, DAG);
1166  case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
1167  case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
1168  case ISD::CTTZ:
1169  case ISD::CTTZ_ZERO_UNDEF:
1170  case ISD::CTLZ:
1171  case ISD::CTLZ_ZERO_UNDEF:
1172  return LowerCTLZ_CTTZ(Op, DAG);
1173  case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
1174  }
1175  return Op;
1176 }
1177 
1180  SelectionDAG &DAG) const {
1181  switch (N->getOpcode()) {
1183  // Different parts of legalization seem to interpret which type of
1184  // sign_extend_inreg is the one to check for custom lowering. The extended
1185  // from type is what really matters, but some places check for custom
1186  // lowering of the result type. This results in trying to use
1187  // ReplaceNodeResults to sext_in_reg to an illegal type, so we'll just do
1188  // nothing here and let the illegal result integer be handled normally.
1189  return;
1190  default:
1191  return;
1192  }
1193 }
1194 
1195 static bool hasDefinedInitializer(const GlobalValue *GV) {
1196  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
1197  if (!GVar || !GVar->hasInitializer())
1198  return false;
1199 
1200  return !isa<UndefValue>(GVar->getInitializer());
1201 }
1202 
1204  SDValue Op,
1205  SelectionDAG &DAG) const {
1206 
1207  const DataLayout &DL = DAG.getDataLayout();
1208  GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
1209  const GlobalValue *GV = G->getGlobal();
1210 
1213  if (!MFI->isEntryFunction()) {
1214  const Function &Fn = DAG.getMachineFunction().getFunction();
1215  DiagnosticInfoUnsupported BadLDSDecl(
1216  Fn, "local memory global used by non-kernel function", SDLoc(Op).getDebugLoc());
1217  DAG.getContext()->diagnose(BadLDSDecl);
1218  }
1219 
1220  // XXX: What does the value of G->getOffset() mean?
1221  assert(G->getOffset() == 0 &&
1222  "Do not know what to do with an non-zero offset");
1223 
1224  // TODO: We could emit code to handle the initialization somewhere.
1225  if (!hasDefinedInitializer(GV)) {
1226  unsigned Offset = MFI->allocateLDSGlobal(DL, *GV);
1227  return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
1228  }
1229  }
1230 
1231  const Function &Fn = DAG.getMachineFunction().getFunction();
1232  DiagnosticInfoUnsupported BadInit(
1233  Fn, "unsupported initializer for address space", SDLoc(Op).getDebugLoc());
1234  DAG.getContext()->diagnose(BadInit);
1235  return SDValue();
1236 }
1237 
1239  SelectionDAG &DAG) const {
1241 
1242  EVT VT = Op.getValueType();
1243  if (VT == MVT::v4i16 || VT == MVT::v4f16) {
1244  SDLoc SL(Op);
1245  SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i32, Op.getOperand(0));
1246  SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i32, Op.getOperand(1));
1247 
1248  SDValue BV = DAG.getBuildVector(MVT::v2i32, SL, { Lo, Hi });
1249  return DAG.getNode(ISD::BITCAST, SL, VT, BV);
1250  }
1251 
1252  for (const SDUse &U : Op->ops())
1253  DAG.ExtractVectorElements(U.get(), Args);
1254 
1255  return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Args);
1256 }
1257 
1259  SelectionDAG &DAG) const {
1260 
1262  unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1263  EVT VT = Op.getValueType();
1264  DAG.ExtractVectorElements(Op.getOperand(0), Args, Start,
1265  VT.getVectorNumElements());
1266 
1267  return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Args);
1268 }
1269 
1270 /// Generate Min/Max node
1272  SDValue LHS, SDValue RHS,
1273  SDValue True, SDValue False,
1274  SDValue CC,
1275  DAGCombinerInfo &DCI) const {
1276  if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
1277  return SDValue();
1278 
1279  SelectionDAG &DAG = DCI.DAG;
1280  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1281  switch (CCOpcode) {
1282  case ISD::SETOEQ:
1283  case ISD::SETONE:
1284  case ISD::SETUNE:
1285  case ISD::SETNE:
1286  case ISD::SETUEQ:
1287  case ISD::SETEQ:
1288  case ISD::SETFALSE:
1289  case ISD::SETFALSE2:
1290  case ISD::SETTRUE:
1291  case ISD::SETTRUE2:
1292  case ISD::SETUO:
1293  case ISD::SETO:
1294  break;
1295  case ISD::SETULE:
1296  case ISD::SETULT: {
1297  if (LHS == True)
1298  return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
1299  return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
1300  }
1301  case ISD::SETOLE:
1302  case ISD::SETOLT:
1303  case ISD::SETLE:
1304  case ISD::SETLT: {
1305  // Ordered. Assume ordered for undefined.
1306 
1307  // Only do this after legalization to avoid interfering with other combines
1308  // which might occur.
1309  if (DCI.getDAGCombineLevel() < AfterLegalizeDAG &&
1310  !DCI.isCalledByLegalizer())
1311  return SDValue();
1312 
1313  // We need to permute the operands to get the correct NaN behavior. The
1314  // selected operand is the second one based on the failing compare with NaN,
1315  // so permute it based on the compare type the hardware uses.
1316  if (LHS == True)
1317  return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
1318  return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
1319  }
1320  case ISD::SETUGE:
1321  case ISD::SETUGT: {
1322  if (LHS == True)
1323  return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
1324  return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
1325  }
1326  case ISD::SETGT:
1327  case ISD::SETGE:
1328  case ISD::SETOGE:
1329  case ISD::SETOGT: {
1330  if (DCI.getDAGCombineLevel() < AfterLegalizeDAG &&
1331  !DCI.isCalledByLegalizer())
1332  return SDValue();
1333 
1334  if (LHS == True)
1335  return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
1336  return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
1337  }
1338  case ISD::SETCC_INVALID:
1339  llvm_unreachable("Invalid setcc condcode!");
1340  }
1341  return SDValue();
1342 }
1343 
1344 std::pair<SDValue, SDValue>
1346  SDLoc SL(Op);
1347 
1348  SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op);
1349 
1350  const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
1351  const SDValue One = DAG.getConstant(1, SL, MVT::i32);
1352 
1353  SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero);
1354  SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One);
1355 
1356  return std::make_pair(Lo, Hi);
1357 }
1358 
1360  SDLoc SL(Op);
1361 
1362  SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op);
1363  const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
1364  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero);
1365 }
1366 
1368  SDLoc SL(Op);
1369 
1370  SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op);
1371  const SDValue One = DAG.getConstant(1, SL, MVT::i32);
1372  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One);
1373 }
1374 
1376  SelectionDAG &DAG) const {
1377  LoadSDNode *Load = cast<LoadSDNode>(Op);
1378  EVT VT = Op.getValueType();
1379 
1380 
1381  // If this is a 2 element vector, we really want to scalarize and not create
1382  // weird 1 element vectors.
1383  if (VT.getVectorNumElements() == 2)
1384  return scalarizeVectorLoad(Load, DAG);
1385 
1386  SDValue BasePtr = Load->getBasePtr();
1387  EVT MemVT = Load->getMemoryVT();
1388  SDLoc SL(Op);
1389 
1390  const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo();
1391 
1392  EVT LoVT, HiVT;
1393  EVT LoMemVT, HiMemVT;
1394  SDValue Lo, Hi;
1395 
1396  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
1397  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT);
1398  std::tie(Lo, Hi) = DAG.SplitVector(Op, SL, LoVT, HiVT);
1399 
1400  unsigned Size = LoMemVT.getStoreSize();
1401  unsigned BaseAlign = Load->getAlignment();
1402  unsigned HiAlign = MinAlign(BaseAlign, Size);
1403 
1404  SDValue LoLoad = DAG.getExtLoad(Load->getExtensionType(), SL, LoVT,
1405  Load->getChain(), BasePtr, SrcValue, LoMemVT,
1406  BaseAlign, Load->getMemOperand()->getFlags());
1407  SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, Size);
1408  SDValue HiLoad =
1409  DAG.getExtLoad(Load->getExtensionType(), SL, HiVT, Load->getChain(),
1410  HiPtr, SrcValue.getWithOffset(LoMemVT.getStoreSize()),
1411  HiMemVT, HiAlign, Load->getMemOperand()->getFlags());
1412 
1413  SDValue Ops[] = {
1414  DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, LoLoad, HiLoad),
1416  LoLoad.getValue(1), HiLoad.getValue(1))
1417  };
1418 
1419  return DAG.getMergeValues(Ops, SL);
1420 }
1421 
1423  SelectionDAG &DAG) const {
1424  StoreSDNode *Store = cast<StoreSDNode>(Op);
1425  SDValue Val = Store->getValue();
1426  EVT VT = Val.getValueType();
1427 
1428  // If this is a 2 element vector, we really want to scalarize and not create
1429  // weird 1 element vectors.
1430  if (VT.getVectorNumElements() == 2)
1431  return scalarizeVectorStore(Store, DAG);
1432 
1433  EVT MemVT = Store->getMemoryVT();
1434  SDValue Chain = Store->getChain();
1435  SDValue BasePtr = Store->getBasePtr();
1436  SDLoc SL(Op);
1437 
1438  EVT LoVT, HiVT;
1439  EVT LoMemVT, HiMemVT;
1440  SDValue Lo, Hi;
1441 
1442  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
1443  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT);
1444  std::tie(Lo, Hi) = DAG.SplitVector(Val, SL, LoVT, HiVT);
1445 
1446  SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, LoMemVT.getStoreSize());
1447 
1448  const MachinePointerInfo &SrcValue = Store->getMemOperand()->getPointerInfo();
1449  unsigned BaseAlign = Store->getAlignment();
1450  unsigned Size = LoMemVT.getStoreSize();
1451  unsigned HiAlign = MinAlign(BaseAlign, Size);
1452 
1453  SDValue LoStore =
1454  DAG.getTruncStore(Chain, SL, Lo, BasePtr, SrcValue, LoMemVT, BaseAlign,
1455  Store->getMemOperand()->getFlags());
1456  SDValue HiStore =
1457  DAG.getTruncStore(Chain, SL, Hi, HiPtr, SrcValue.getWithOffset(Size),
1458  HiMemVT, HiAlign, Store->getMemOperand()->getFlags());
1459 
1460  return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoStore, HiStore);
1461 }
1462 
1463 // This is a shortcut for integer division because we have fast i32<->f32
1464 // conversions, and fast f32 reciprocal instructions. The fractional part of a
1465 // float is enough to accurately represent up to a 24-bit signed integer.
1467  bool Sign) const {
1468  SDLoc DL(Op);
1469  EVT VT = Op.getValueType();
1470  SDValue LHS = Op.getOperand(0);
1471  SDValue RHS = Op.getOperand(1);
1472  MVT IntVT = MVT::i32;
1473  MVT FltVT = MVT::f32;
1474 
1475  unsigned LHSSignBits = DAG.ComputeNumSignBits(LHS);
1476  if (LHSSignBits < 9)
1477  return SDValue();
1478 
1479  unsigned RHSSignBits = DAG.ComputeNumSignBits(RHS);
1480  if (RHSSignBits < 9)
1481  return SDValue();
1482 
1483  unsigned BitSize = VT.getSizeInBits();
1484  unsigned SignBits = std::min(LHSSignBits, RHSSignBits);
1485  unsigned DivBits = BitSize - SignBits;
1486  if (Sign)
1487  ++DivBits;
1488 
1491 
1492  SDValue jq = DAG.getConstant(1, DL, IntVT);
1493 
1494  if (Sign) {
1495  // char|short jq = ia ^ ib;
1496  jq = DAG.getNode(ISD::XOR, DL, VT, LHS, RHS);
1497 
1498  // jq = jq >> (bitsize - 2)
1499  jq = DAG.getNode(ISD::SRA, DL, VT, jq,
1500  DAG.getConstant(BitSize - 2, DL, VT));
1501 
1502  // jq = jq | 0x1
1503  jq = DAG.getNode(ISD::OR, DL, VT, jq, DAG.getConstant(1, DL, VT));
1504  }
1505 
1506  // int ia = (int)LHS;
1507  SDValue ia = LHS;
1508 
1509  // int ib, (int)RHS;
1510  SDValue ib = RHS;
1511 
1512  // float fa = (float)ia;
1513  SDValue fa = DAG.getNode(ToFp, DL, FltVT, ia);
1514 
1515  // float fb = (float)ib;
1516  SDValue fb = DAG.getNode(ToFp, DL, FltVT, ib);
1517 
1518  SDValue fq = DAG.getNode(ISD::FMUL, DL, FltVT,
1519  fa, DAG.getNode(AMDGPUISD::RCP, DL, FltVT, fb));
1520 
1521  // fq = trunc(fq);
1522  fq = DAG.getNode(ISD::FTRUNC, DL, FltVT, fq);
1523 
1524  // float fqneg = -fq;
1525  SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FltVT, fq);
1526 
1527  // float fr = mad(fqneg, fb, fa);
1528  unsigned OpCode = Subtarget->hasFP32Denormals() ?
1530  (unsigned)ISD::FMAD;
1531  SDValue fr = DAG.getNode(OpCode, DL, FltVT, fqneg, fb, fa);
1532 
1533  // int iq = (int)fq;
1534  SDValue iq = DAG.getNode(ToInt, DL, IntVT, fq);
1535 
1536  // fr = fabs(fr);
1537  fr = DAG.getNode(ISD::FABS, DL, FltVT, fr);
1538 
1539  // fb = fabs(fb);
1540  fb = DAG.getNode(ISD::FABS, DL, FltVT, fb);
1541 
1542  EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
1543 
1544  // int cv = fr >= fb;
1545  SDValue cv = DAG.getSetCC(DL, SetCCVT, fr, fb, ISD::SETOGE);
1546 
1547  // jq = (cv ? jq : 0);
1548  jq = DAG.getNode(ISD::SELECT, DL, VT, cv, jq, DAG.getConstant(0, DL, VT));
1549 
1550  // dst = iq + jq;
1551  SDValue Div = DAG.getNode(ISD::ADD, DL, VT, iq, jq);
1552 
1553  // Rem needs compensation, it's easier to recompute it
1554  SDValue Rem = DAG.getNode(ISD::MUL, DL, VT, Div, RHS);
1555  Rem = DAG.getNode(ISD::SUB, DL, VT, LHS, Rem);
1556 
1557  // Truncate to number of bits this divide really is.
1558  if (Sign) {
1559  SDValue InRegSize
1560  = DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), DivBits));
1561  Div = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Div, InRegSize);
1562  Rem = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Rem, InRegSize);
1563  } else {
1564  SDValue TruncMask = DAG.getConstant((UINT64_C(1) << DivBits) - 1, DL, VT);
1565  Div = DAG.getNode(ISD::AND, DL, VT, Div, TruncMask);
1566  Rem = DAG.getNode(ISD::AND, DL, VT, Rem, TruncMask);
1567  }
1568 
1569  return DAG.getMergeValues({ Div, Rem }, DL);
1570 }
1571 
1573  SelectionDAG &DAG,
1575  SDLoc DL(Op);
1576  EVT VT = Op.getValueType();
1577 
1578  assert(VT == MVT::i64 && "LowerUDIVREM64 expects an i64");
1579 
1580  EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
1581 
1582  SDValue One = DAG.getConstant(1, DL, HalfVT);
1583  SDValue Zero = DAG.getConstant(0, DL, HalfVT);
1584 
1585  //HiLo split
1586  SDValue LHS = Op.getOperand(0);
1587  SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, Zero);
1588  SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, One);
1589 
1590  SDValue RHS = Op.getOperand(1);
1591  SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, Zero);
1592  SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, One);
1593 
1594  if (DAG.MaskedValueIsZero(RHS, APInt::getHighBitsSet(64, 32)) &&
1595  DAG.MaskedValueIsZero(LHS, APInt::getHighBitsSet(64, 32))) {
1596 
1597  SDValue Res = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(HalfVT, HalfVT),
1598  LHS_Lo, RHS_Lo);
1599 
1600  SDValue DIV = DAG.getBuildVector(MVT::v2i32, DL, {Res.getValue(0), Zero});
1601  SDValue REM = DAG.getBuildVector(MVT::v2i32, DL, {Res.getValue(1), Zero});
1602 
1603  Results.push_back(DAG.getNode(ISD::BITCAST, DL, MVT::i64, DIV));
1604  Results.push_back(DAG.getNode(ISD::BITCAST, DL, MVT::i64, REM));
1605  return;
1606  }
1607 
1608  if (isTypeLegal(MVT::i64)) {
1609  // Compute denominator reciprocal.
1610  unsigned FMAD = Subtarget->hasFP32Denormals() ?
1612  (unsigned)ISD::FMAD;
1613 
1614  SDValue Cvt_Lo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Lo);
1615  SDValue Cvt_Hi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Hi);
1616  SDValue Mad1 = DAG.getNode(FMAD, DL, MVT::f32, Cvt_Hi,
1617  DAG.getConstantFP(APInt(32, 0x4f800000).bitsToFloat(), DL, MVT::f32),
1618  Cvt_Lo);
1619  SDValue Rcp = DAG.getNode(AMDGPUISD::RCP, DL, MVT::f32, Mad1);
1620  SDValue Mul1 = DAG.getNode(ISD::FMUL, DL, MVT::f32, Rcp,
1621  DAG.getConstantFP(APInt(32, 0x5f7ffffc).bitsToFloat(), DL, MVT::f32));
1622  SDValue Mul2 = DAG.getNode(ISD::FMUL, DL, MVT::f32, Mul1,
1623  DAG.getConstantFP(APInt(32, 0x2f800000).bitsToFloat(), DL, MVT::f32));
1624  SDValue Trunc = DAG.getNode(ISD::FTRUNC, DL, MVT::f32, Mul2);
1625  SDValue Mad2 = DAG.getNode(FMAD, DL, MVT::f32, Trunc,
1626  DAG.getConstantFP(APInt(32, 0xcf800000).bitsToFloat(), DL, MVT::f32),
1627  Mul1);
1628  SDValue Rcp_Lo = DAG.getNode(ISD::FP_TO_UINT, DL, HalfVT, Mad2);
1629  SDValue Rcp_Hi = DAG.getNode(ISD::FP_TO_UINT, DL, HalfVT, Trunc);
1630  SDValue Rcp64 = DAG.getBitcast(VT,
1631  DAG.getBuildVector(MVT::v2i32, DL, {Rcp_Lo, Rcp_Hi}));
1632 
1633  SDValue Zero64 = DAG.getConstant(0, DL, VT);
1634  SDValue One64 = DAG.getConstant(1, DL, VT);
1635  SDValue Zero1 = DAG.getConstant(0, DL, MVT::i1);
1636  SDVTList HalfCarryVT = DAG.getVTList(HalfVT, MVT::i1);
1637 
1638  SDValue Neg_RHS = DAG.getNode(ISD::SUB, DL, VT, Zero64, RHS);
1639  SDValue Mullo1 = DAG.getNode(ISD::MUL, DL, VT, Neg_RHS, Rcp64);
1640  SDValue Mulhi1 = DAG.getNode(ISD::MULHU, DL, VT, Rcp64, Mullo1);
1641  SDValue Mulhi1_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi1,
1642  Zero);
1643  SDValue Mulhi1_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi1,
1644  One);
1645 
1646  SDValue Add1_Lo = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Rcp_Lo,
1647  Mulhi1_Lo, Zero1);
1648  SDValue Add1_Hi = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Rcp_Hi,
1649  Mulhi1_Hi, Add1_Lo.getValue(1));
1650  SDValue Add1_HiNc = DAG.getNode(ISD::ADD, DL, HalfVT, Rcp_Hi, Mulhi1_Hi);
1651  SDValue Add1 = DAG.getBitcast(VT,
1652  DAG.getBuildVector(MVT::v2i32, DL, {Add1_Lo, Add1_Hi}));
1653 
1654  SDValue Mullo2 = DAG.getNode(ISD::MUL, DL, VT, Neg_RHS, Add1);
1655  SDValue Mulhi2 = DAG.getNode(ISD::MULHU, DL, VT, Add1, Mullo2);
1656  SDValue Mulhi2_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi2,
1657  Zero);
1658  SDValue Mulhi2_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi2,
1659  One);
1660 
1661  SDValue Add2_Lo = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Add1_Lo,
1662  Mulhi2_Lo, Zero1);
1663  SDValue Add2_HiC = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Add1_HiNc,
1664  Mulhi2_Hi, Add1_Lo.getValue(1));
1665  SDValue Add2_Hi = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Add2_HiC,
1666  Zero, Add2_Lo.getValue(1));
1667  SDValue Add2 = DAG.getBitcast(VT,
1668  DAG.getBuildVector(MVT::v2i32, DL, {Add2_Lo, Add2_Hi}));
1669  SDValue Mulhi3 = DAG.getNode(ISD::MULHU, DL, VT, LHS, Add2);
1670 
1671  SDValue Mul3 = DAG.getNode(ISD::MUL, DL, VT, RHS, Mulhi3);
1672 
1673  SDValue Mul3_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mul3, Zero);
1674  SDValue Mul3_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mul3, One);
1675  SDValue Sub1_Lo = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, LHS_Lo,
1676  Mul3_Lo, Zero1);
1677  SDValue Sub1_Hi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, LHS_Hi,
1678  Mul3_Hi, Sub1_Lo.getValue(1));
1679  SDValue Sub1_Mi = DAG.getNode(ISD::SUB, DL, HalfVT, LHS_Hi, Mul3_Hi);
1680  SDValue Sub1 = DAG.getBitcast(VT,
1681  DAG.getBuildVector(MVT::v2i32, DL, {Sub1_Lo, Sub1_Hi}));
1682 
1683  SDValue MinusOne = DAG.getConstant(0xffffffffu, DL, HalfVT);
1684  SDValue C1 = DAG.getSelectCC(DL, Sub1_Hi, RHS_Hi, MinusOne, Zero,
1685  ISD::SETUGE);
1686  SDValue C2 = DAG.getSelectCC(DL, Sub1_Lo, RHS_Lo, MinusOne, Zero,
1687  ISD::SETUGE);
1688  SDValue C3 = DAG.getSelectCC(DL, Sub1_Hi, RHS_Hi, C2, C1, ISD::SETEQ);
1689 
1690  // TODO: Here and below portions of the code can be enclosed into if/endif.
1691  // Currently control flow is unconditional and we have 4 selects after
1692  // potential endif to substitute PHIs.
1693 
1694  // if C3 != 0 ...
1695  SDValue Sub2_Lo = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub1_Lo,
1696  RHS_Lo, Zero1);
1697  SDValue Sub2_Mi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub1_Mi,
1698  RHS_Hi, Sub1_Lo.getValue(1));
1699  SDValue Sub2_Hi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub2_Mi,
1700  Zero, Sub2_Lo.getValue(1));
1701  SDValue Sub2 = DAG.getBitcast(VT,
1702  DAG.getBuildVector(MVT::v2i32, DL, {Sub2_Lo, Sub2_Hi}));
1703 
1704  SDValue Add3 = DAG.getNode(ISD::ADD, DL, VT, Mulhi3, One64);
1705 
1706  SDValue C4 = DAG.getSelectCC(DL, Sub2_Hi, RHS_Hi, MinusOne, Zero,
1707  ISD::SETUGE);
1708  SDValue C5 = DAG.getSelectCC(DL, Sub2_Lo, RHS_Lo, MinusOne, Zero,
1709  ISD::SETUGE);
1710  SDValue C6 = DAG.getSelectCC(DL, Sub2_Hi, RHS_Hi, C5, C4, ISD::SETEQ);
1711 
1712  // if (C6 != 0)
1713  SDValue Add4 = DAG.getNode(ISD::ADD, DL, VT, Add3, One64);
1714 
1715  SDValue Sub3_Lo = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub2_Lo,
1716  RHS_Lo, Zero1);
1717  SDValue Sub3_Mi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub2_Mi,
1718  RHS_Hi, Sub2_Lo.getValue(1));
1719  SDValue Sub3_Hi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub3_Mi,
1720  Zero, Sub3_Lo.getValue(1));
1721  SDValue Sub3 = DAG.getBitcast(VT,
1722  DAG.getBuildVector(MVT::v2i32, DL, {Sub3_Lo, Sub3_Hi}));
1723 
1724  // endif C6
1725  // endif C3
1726 
1727  SDValue Sel1 = DAG.getSelectCC(DL, C6, Zero, Add4, Add3, ISD::SETNE);
1728  SDValue Div = DAG.getSelectCC(DL, C3, Zero, Sel1, Mulhi3, ISD::SETNE);
1729 
1730  SDValue Sel2 = DAG.getSelectCC(DL, C6, Zero, Sub3, Sub2, ISD::SETNE);
1731  SDValue Rem = DAG.getSelectCC(DL, C3, Zero, Sel2, Sub1, ISD::SETNE);
1732 
1733  Results.push_back(Div);
1734  Results.push_back(Rem);
1735 
1736  return;
1737  }
1738 
1739  // r600 expandion.
1740  // Get Speculative values
1741  SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
1742  SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
1743 
1744  SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, Zero, REM_Part, LHS_Hi, ISD::SETEQ);
1745  SDValue REM = DAG.getBuildVector(MVT::v2i32, DL, {REM_Lo, Zero});
1746  REM = DAG.getNode(ISD::BITCAST, DL, MVT::i64, REM);
1747 
1748  SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, Zero, DIV_Part, Zero, ISD::SETEQ);
1749  SDValue DIV_Lo = Zero;
1750 
1751  const unsigned halfBitWidth = HalfVT.getSizeInBits();
1752 
1753  for (unsigned i = 0; i < halfBitWidth; ++i) {
1754  const unsigned bitPos = halfBitWidth - i - 1;
1755  SDValue POS = DAG.getConstant(bitPos, DL, HalfVT);
1756  // Get value of high bit
1757  SDValue HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
1758  HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, One);
1759  HBit = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, HBit);
1760 
1761  // Shift
1762  REM = DAG.getNode(ISD::SHL, DL, VT, REM, DAG.getConstant(1, DL, VT));
1763  // Add LHS high bit
1764  REM = DAG.getNode(ISD::OR, DL, VT, REM, HBit);
1765 
1766  SDValue BIT = DAG.getConstant(1ULL << bitPos, DL, HalfVT);
1767  SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, Zero, ISD::SETUGE);
1768 
1769  DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
1770 
1771  // Update REM
1772  SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
1773  REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETUGE);
1774  }
1775 
1776  SDValue DIV = DAG.getBuildVector(MVT::v2i32, DL, {DIV_Lo, DIV_Hi});
1777  DIV = DAG.getNode(ISD::BITCAST, DL, MVT::i64, DIV);
1778  Results.push_back(DIV);
1779  Results.push_back(REM);
1780 }
1781 
1783  SelectionDAG &DAG) const {
1784  SDLoc DL(Op);
1785  EVT VT = Op.getValueType();
1786 
1787  if (VT == MVT::i64) {
1789  LowerUDIVREM64(Op, DAG, Results);
1790  return DAG.getMergeValues(Results, DL);
1791  }
1792 
1793  if (VT == MVT::i32) {
1794  if (SDValue Res = LowerDIVREM24(Op, DAG, false))
1795  return Res;
1796  }
1797 
1798  SDValue Num = Op.getOperand(0);
1799  SDValue Den = Op.getOperand(1);
1800 
1801  // RCP = URECIP(Den) = 2^32 / Den + e
1802  // e is rounding error.
1803  SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
1804 
1805  // RCP_LO = mul(RCP, Den) */
1806  SDValue RCP_LO = DAG.getNode(ISD::MUL, DL, VT, RCP, Den);
1807 
1808  // RCP_HI = mulhu (RCP, Den) */
1809  SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
1810 
1811  // NEG_RCP_LO = -RCP_LO
1812  SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
1813  RCP_LO);
1814 
1815  // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
1816  SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, DL, VT),
1817  NEG_RCP_LO, RCP_LO,
1818  ISD::SETEQ);
1819  // Calculate the rounding error from the URECIP instruction
1820  // E = mulhu(ABS_RCP_LO, RCP)
1821  SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
1822 
1823  // RCP_A_E = RCP + E
1824  SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
1825 
1826  // RCP_S_E = RCP - E
1827  SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
1828 
1829  // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
1830  SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, DL, VT),
1831  RCP_A_E, RCP_S_E,
1832  ISD::SETEQ);
1833  // Quotient = mulhu(Tmp0, Num)
1834  SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
1835 
1836  // Num_S_Remainder = Quotient * Den
1837  SDValue Num_S_Remainder = DAG.getNode(ISD::MUL, DL, VT, Quotient, Den);
1838 
1839  // Remainder = Num - Num_S_Remainder
1840  SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
1841 
1842  // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
1843  SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
1844  DAG.getConstant(-1, DL, VT),
1845  DAG.getConstant(0, DL, VT),
1846  ISD::SETUGE);
1847  // Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0)
1848  SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Num,
1849  Num_S_Remainder,
1850  DAG.getConstant(-1, DL, VT),
1851  DAG.getConstant(0, DL, VT),
1852  ISD::SETUGE);
1853  // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
1854  SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
1855  Remainder_GE_Zero);
1856 
1857  // Calculate Division result:
1858 
1859  // Quotient_A_One = Quotient + 1
1860  SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
1861  DAG.getConstant(1, DL, VT));
1862 
1863  // Quotient_S_One = Quotient - 1
1864  SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
1865  DAG.getConstant(1, DL, VT));
1866 
1867  // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
1868  SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, DL, VT),
1869  Quotient, Quotient_A_One, ISD::SETEQ);
1870 
1871  // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
1872  Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, DL, VT),
1873  Quotient_S_One, Div, ISD::SETEQ);
1874 
1875  // Calculate Rem result:
1876 
1877  // Remainder_S_Den = Remainder - Den
1878  SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
1879 
1880  // Remainder_A_Den = Remainder + Den
1881  SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
1882 
1883  // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
1884  SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, DL, VT),
1885  Remainder, Remainder_S_Den, ISD::SETEQ);
1886 
1887  // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
1888  Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, DL, VT),
1889  Remainder_A_Den, Rem, ISD::SETEQ);
1890  SDValue Ops[2] = {
1891  Div,
1892  Rem
1893  };
1894  return DAG.getMergeValues(Ops, DL);
1895 }
1896 
1898  SelectionDAG &DAG) const {
1899  SDLoc DL(Op);
1900  EVT VT = Op.getValueType();
1901 
1902  SDValue LHS = Op.getOperand(0);
1903  SDValue RHS = Op.getOperand(1);
1904 
1905  SDValue Zero = DAG.getConstant(0, DL, VT);
1906  SDValue NegOne = DAG.getConstant(-1, DL, VT);
1907 
1908  if (VT == MVT::i32) {
1909  if (SDValue Res = LowerDIVREM24(Op, DAG, true))
1910  return Res;
1911  }
1912 
1913  if (VT == MVT::i64 &&
1914  DAG.ComputeNumSignBits(LHS) > 32 &&
1915  DAG.ComputeNumSignBits(RHS) > 32) {
1916  EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
1917 
1918  //HiLo split
1919  SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, Zero);
1920  SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, Zero);
1921  SDValue DIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(HalfVT, HalfVT),
1922  LHS_Lo, RHS_Lo);
1923  SDValue Res[2] = {
1924  DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(0)),
1925  DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(1))
1926  };
1927  return DAG.getMergeValues(Res, DL);
1928  }
1929 
1930  SDValue LHSign = DAG.getSelectCC(DL, LHS, Zero, NegOne, Zero, ISD::SETLT);
1931  SDValue RHSign = DAG.getSelectCC(DL, RHS, Zero, NegOne, Zero, ISD::SETLT);
1932  SDValue DSign = DAG.getNode(ISD::XOR, DL, VT, LHSign, RHSign);
1933  SDValue RSign = LHSign; // Remainder sign is the same as LHS
1934 
1935  LHS = DAG.getNode(ISD::ADD, DL, VT, LHS, LHSign);
1936  RHS = DAG.getNode(ISD::ADD, DL, VT, RHS, RHSign);
1937 
1938  LHS = DAG.getNode(ISD::XOR, DL, VT, LHS, LHSign);
1939  RHS = DAG.getNode(ISD::XOR, DL, VT, RHS, RHSign);
1940 
1941  SDValue Div = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), LHS, RHS);
1942  SDValue Rem = Div.getValue(1);
1943 
1944  Div = DAG.getNode(ISD::XOR, DL, VT, Div, DSign);
1945  Rem = DAG.getNode(ISD::XOR, DL, VT, Rem, RSign);
1946 
1947  Div = DAG.getNode(ISD::SUB, DL, VT, Div, DSign);
1948  Rem = DAG.getNode(ISD::SUB, DL, VT, Rem, RSign);
1949 
1950  SDValue Res[2] = {
1951  Div,
1952  Rem
1953  };
1954  return DAG.getMergeValues(Res, DL);
1955 }
1956 
1957 // (frem x, y) -> (fsub x, (fmul (ftrunc (fdiv x, y)), y))
1959  SDLoc SL(Op);
1960  EVT VT = Op.getValueType();
1961  SDValue X = Op.getOperand(0);
1962  SDValue Y = Op.getOperand(1);
1963 
1964  // TODO: Should this propagate fast-math-flags?
1965 
1966  SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y);
1967  SDValue Floor = DAG.getNode(ISD::FTRUNC, SL, VT, Div);
1968  SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Floor, Y);
1969 
1970  return DAG.getNode(ISD::FSUB, SL, VT, X, Mul);
1971 }
1972 
1974  SDLoc SL(Op);
1975  SDValue Src = Op.getOperand(0);
1976 
1977  // result = trunc(src)
1978  // if (src > 0.0 && src != result)
1979  // result += 1.0
1980 
1981  SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
1982 
1983  const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64);
1984  const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f64);
1985 
1986  EVT SetCCVT =
1988 
1989  SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOGT);
1990  SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
1991  SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
1992 
1993  SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero);
1994  // TODO: Should this propagate fast-math-flags?
1995  return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
1996 }
1997 
1999  SelectionDAG &DAG) {
2000  const unsigned FractBits = 52;
2001  const unsigned ExpBits = 11;
2002 
2003  SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32,
2004  Hi,
2005  DAG.getConstant(FractBits - 32, SL, MVT::i32),
2006  DAG.getConstant(ExpBits, SL, MVT::i32));
2007  SDValue Exp = DAG.getNode(ISD::SUB, SL, MVT::i32, ExpPart,
2008  DAG.getConstant(1023, SL, MVT::i32));
2009 
2010  return Exp;
2011 }
2012 
2014  SDLoc SL(Op);
2015  SDValue Src = Op.getOperand(0);
2016 
2017  assert(Op.getValueType() == MVT::f64);
2018 
2019  const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
2020  const SDValue One = DAG.getConstant(1, SL, MVT::i32);
2021 
2022  SDValue VecSrc = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
2023 
2024  // Extract the upper half, since this is where we will find the sign and
2025  // exponent.
2026  SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, VecSrc, One);
2027 
2028  SDValue Exp = extractF64Exponent(Hi, SL, DAG);
2029 
2030  const unsigned FractBits = 52;
2031 
2032  // Extract the sign bit.
2033  const SDValue SignBitMask = DAG.getConstant(UINT32_C(1) << 31, SL, MVT::i32);
2034  SDValue SignBit = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, SignBitMask);
2035 
2036  // Extend back to 64-bits.
2037  SDValue SignBit64 = DAG.getBuildVector(MVT::v2i32, SL, {Zero, SignBit});
2038  SignBit64 = DAG.getNode(ISD::BITCAST, SL, MVT::i64, SignBit64);
2039 
2040  SDValue BcInt = DAG.getNode(ISD::BITCAST, SL, MVT::i64, Src);
2041  const SDValue FractMask
2042  = DAG.getConstant((UINT64_C(1) << FractBits) - 1, SL, MVT::i64);
2043 
2044  SDValue Shr = DAG.getNode(ISD::SRA, SL, MVT::i64, FractMask, Exp);
2045  SDValue Not = DAG.getNOT(SL, Shr, MVT::i64);
2046  SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, BcInt, Not);
2047 
2048  EVT SetCCVT =
2050 
2051  const SDValue FiftyOne = DAG.getConstant(FractBits - 1, SL, MVT::i32);
2052 
2053  SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT);
2054  SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT);
2055 
2056  SDValue Tmp1 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpLt0, SignBit64, Tmp0);
2057  SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpGt51, BcInt, Tmp1);
2058 
2059  return DAG.getNode(ISD::BITCAST, SL, MVT::f64, Tmp2);
2060 }
2061 
2063  SDLoc SL(Op);
2064  SDValue Src = Op.getOperand(0);
2065 
2066  assert(Op.getValueType() == MVT::f64);
2067 
2068  APFloat C1Val(APFloat::IEEEdouble(), "0x1.0p+52");
2069  SDValue C1 = DAG.getConstantFP(C1Val, SL, MVT::f64);
2070  SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src);
2071 
2072  // TODO: Should this propagate fast-math-flags?
2073 
2074  SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign);
2075  SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign);
2076 
2077  SDValue Fabs = DAG.getNode(ISD::FABS, SL, MVT::f64, Src);
2078 
2079  APFloat C2Val(APFloat::IEEEdouble(), "0x1.fffffffffffffp+51");
2080  SDValue C2 = DAG.getConstantFP(C2Val, SL, MVT::f64);
2081 
2082  EVT SetCCVT =
2084  SDValue Cond = DAG.getSetCC(SL, SetCCVT, Fabs, C2, ISD::SETOGT);
2085 
2086  return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2);
2087 }
2088 
2090  // FNEARBYINT and FRINT are the same, except in their handling of FP
2091  // exceptions. Those aren't really meaningful for us, and OpenCL only has
2092  // rint, so just treat them as equivalent.
2093  return DAG.getNode(ISD::FRINT, SDLoc(Op), Op.getValueType(), Op.getOperand(0));
2094 }
2095 
2096 // XXX - May require not supporting f32 denormals?
2097 
2098 // Don't handle v2f16. The extra instructions to scalarize and repack around the
2099 // compare and vselect end up producing worse code than scalarizing the whole
2100 // operation.
2102  SDLoc SL(Op);
2103  SDValue X = Op.getOperand(0);
2104  EVT VT = Op.getValueType();
2105 
2106  SDValue T = DAG.getNode(ISD::FTRUNC, SL, VT, X);
2107 
2108  // TODO: Should this propagate fast-math-flags?
2109 
2110  SDValue Diff = DAG.getNode(ISD::FSUB, SL, VT, X, T);
2111 
2112  SDValue AbsDiff = DAG.getNode(ISD::FABS, SL, VT, Diff);
2113 
2114  const SDValue Zero = DAG.getConstantFP(0.0, SL, VT);
2115  const SDValue One = DAG.getConstantFP(1.0, SL, VT);
2116  const SDValue Half = DAG.getConstantFP(0.5, SL, VT);
2117 
2118  SDValue SignOne = DAG.getNode(ISD::FCOPYSIGN, SL, VT, One, X);
2119 
2120  EVT SetCCVT =
2121  getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
2122 
2123  SDValue Cmp = DAG.getSetCC(SL, SetCCVT, AbsDiff, Half, ISD::SETOGE);
2124 
2125  SDValue Sel = DAG.getNode(ISD::SELECT, SL, VT, Cmp, SignOne, Zero);
2126 
2127  return DAG.getNode(ISD::FADD, SL, VT, T, Sel);
2128 }
2129 
2131  SDLoc SL(Op);
2132  SDValue X = Op.getOperand(0);
2133 
2134  SDValue L = DAG.getNode(ISD::BITCAST, SL, MVT::i64, X);
2135 
2136  const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
2137  const SDValue One = DAG.getConstant(1, SL, MVT::i32);
2138  const SDValue NegOne = DAG.getConstant(-1, SL, MVT::i32);
2139  const SDValue FiftyOne = DAG.getConstant(51, SL, MVT::i32);
2140  EVT SetCCVT =
2142 
2143  SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, X);
2144 
2145  SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC, One);
2146 
2147  SDValue Exp = extractF64Exponent(Hi, SL, DAG);
2148 
2149  const SDValue Mask = DAG.getConstant(INT64_C(0x000fffffffffffff), SL,
2150  MVT::i64);
2151 
2152  SDValue M = DAG.getNode(ISD::SRA, SL, MVT::i64, Mask, Exp);
2153  SDValue D = DAG.getNode(ISD::SRA, SL, MVT::i64,
2154  DAG.getConstant(INT64_C(0x0008000000000000), SL,
2155  MVT::i64),
2156  Exp);
2157 
2158  SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, L, M);
2159  SDValue Tmp1 = DAG.getSetCC(SL, SetCCVT,
2160  DAG.getConstant(0, SL, MVT::i64), Tmp0,
2161  ISD::SETNE);
2162 
2163  SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, Tmp1,
2164  D, DAG.getConstant(0, SL, MVT::i64));
2165  SDValue K = DAG.getNode(ISD::ADD, SL, MVT::i64, L, Tmp2);
2166 
2167  K = DAG.getNode(ISD::AND, SL, MVT::i64, K, DAG.getNOT(SL, M, MVT::i64));
2168  K = DAG.getNode(ISD::BITCAST, SL, MVT::f64, K);
2169 
2170  SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT);
2171  SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT);
2172  SDValue ExpEqNegOne = DAG.getSetCC(SL, SetCCVT, NegOne, Exp, ISD::SETEQ);
2173 
2174  SDValue Mag = DAG.getNode(ISD::SELECT, SL, MVT::f64,
2175  ExpEqNegOne,
2176  DAG.getConstantFP(1.0, SL, MVT::f64),
2177  DAG.getConstantFP(0.0, SL, MVT::f64));
2178 
2179  SDValue S = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, Mag, X);
2180 
2181  K = DAG.getNode(ISD::SELECT, SL, MVT::f64, ExpLt0, S, K);
2182  K = DAG.getNode(ISD::SELECT, SL, MVT::f64, ExpGt51, X, K);
2183 
2184  return K;
2185 }
2186 
2188  EVT VT = Op.getValueType();
2189 
2190  if (VT == MVT::f32 || VT == MVT::f16)
2191  return LowerFROUND32_16(Op, DAG);
2192 
2193  if (VT == MVT::f64)
2194  return LowerFROUND64(Op, DAG);
2195 
2196  llvm_unreachable("unhandled type");
2197 }
2198 
2200  SDLoc SL(Op);
2201  SDValue Src = Op.getOperand(0);
2202 
2203  // result = trunc(src);
2204  // if (src < 0.0 && src != result)
2205  // result += -1.0.
2206 
2207  SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
2208 
2209  const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64);
2210  const SDValue NegOne = DAG.getConstantFP(-1.0, SL, MVT::f64);
2211 
2212  EVT SetCCVT =
2214 
2215  SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOLT);
2216  SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
2217  SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
2218 
2219  SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero);
2220  // TODO: Should this propagate fast-math-flags?
2221  return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
2222 }
2223 
2225  double Log2BaseInverted) const {
2226  EVT VT = Op.getValueType();
2227 
2228  SDLoc SL(Op);
2229  SDValue Operand = Op.getOperand(0);
2230  SDValue Log2Operand = DAG.getNode(ISD::FLOG2, SL, VT, Operand);
2231  SDValue Log2BaseInvertedOperand = DAG.getConstantFP(Log2BaseInverted, SL, VT);
2232 
2233  return DAG.getNode(ISD::FMUL, SL, VT, Log2Operand, Log2BaseInvertedOperand);
2234 }
2235 
2236 // Return M_LOG2E of appropriate type
2237 static SDValue getLog2EVal(SelectionDAG &DAG, const SDLoc &SL, EVT VT) {
2238  switch (VT.getScalarType().getSimpleVT().SimpleTy) {
2239  case MVT::f32:
2240  return DAG.getConstantFP(1.44269504088896340735992468100189214f, SL, VT);
2241  case MVT::f16:
2242  return DAG.getConstantFP(
2243  APFloat(APFloat::IEEEhalf(), "1.44269504088896340735992468100189214"),
2244  SL, VT);
2245  case MVT::f64:
2246  return DAG.getConstantFP(
2247  APFloat(APFloat::IEEEdouble(), "0x1.71547652b82fep+0"), SL, VT);
2248  default:
2249  llvm_unreachable("unsupported fp type");
2250  }
2251 }
2252 
2253 // exp2(M_LOG2E_F * f);
2255  EVT VT = Op.getValueType();
2256  SDLoc SL(Op);
2257  SDValue Src = Op.getOperand(0);
2258 
2259  const SDValue K = getLog2EVal(DAG, SL, VT);
2260  SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Src, K, Op->getFlags());
2261  return DAG.getNode(ISD::FEXP2, SL, VT, Mul, Op->getFlags());
2262 }
2263 
2264 static bool isCtlzOpc(unsigned Opc) {
2265  return Opc == ISD::CTLZ || Opc == ISD::CTLZ_ZERO_UNDEF;
2266 }
2267 
2268 static bool isCttzOpc(unsigned Opc) {
2269  return Opc == ISD::CTTZ || Opc == ISD::CTTZ_ZERO_UNDEF;
2270 }
2271 
2273  SDLoc SL(Op);
2274  SDValue Src = Op.getOperand(0);
2275  bool ZeroUndef = Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF ||
2277 
2278  unsigned ISDOpc, NewOpc;
2279  if (isCtlzOpc(Op.getOpcode())) {
2280  ISDOpc = ISD::CTLZ_ZERO_UNDEF;
2281  NewOpc = AMDGPUISD::FFBH_U32;
2282  } else if (isCttzOpc(Op.getOpcode())) {
2283  ISDOpc = ISD::CTTZ_ZERO_UNDEF;
2284  NewOpc = AMDGPUISD::FFBL_B32;
2285  } else
2286  llvm_unreachable("Unexpected OPCode!!!");
2287 
2288 
2289  if (ZeroUndef && Src.getValueType() == MVT::i32)
2290  return DAG.getNode(NewOpc, SL, MVT::i32, Src);
2291 
2292  SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
2293 
2294  const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
2295  const SDValue One = DAG.getConstant(1, SL, MVT::i32);
2296 
2297  SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero);
2298  SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One);
2299 
2300  EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(),
2301  *DAG.getContext(), MVT::i32);
2302 
2303  SDValue HiOrLo = isCtlzOpc(Op.getOpcode()) ? Hi : Lo;
2304  SDValue Hi0orLo0 = DAG.getSetCC(SL, SetCCVT, HiOrLo, Zero, ISD::SETEQ);
2305 
2306  SDValue OprLo = DAG.getNode(ISDOpc, SL, MVT::i32, Lo);
2307  SDValue OprHi = DAG.getNode(ISDOpc, SL, MVT::i32, Hi);
2308 
2309  const SDValue Bits32 = DAG.getConstant(32, SL, MVT::i32);
2310  SDValue Add, NewOpr;
2311  if (isCtlzOpc(Op.getOpcode())) {
2312  Add = DAG.getNode(ISD::ADD, SL, MVT::i32, OprLo, Bits32);
2313  // ctlz(x) = hi_32(x) == 0 ? ctlz(lo_32(x)) + 32 : ctlz(hi_32(x))
2314  NewOpr = DAG.getNode(ISD::SELECT, SL, MVT::i32, Hi0orLo0, Add, OprHi);
2315  } else {
2316  Add = DAG.getNode(ISD::ADD, SL, MVT::i32, OprHi, Bits32);
2317  // cttz(x) = lo_32(x) == 0 ? cttz(hi_32(x)) + 32 : cttz(lo_32(x))
2318  NewOpr = DAG.getNode(ISD::SELECT, SL, MVT::i32, Hi0orLo0, Add, OprLo);
2319  }
2320 
2321  if (!ZeroUndef) {
2322  // Test if the full 64-bit input is zero.
2323 
2324  // FIXME: DAG combines turn what should be an s_and_b64 into a v_or_b32,
2325  // which we probably don't want.
2326  SDValue LoOrHi = isCtlzOpc(Op.getOpcode()) ? Lo : Hi;
2327  SDValue Lo0OrHi0 = DAG.getSetCC(SL, SetCCVT, LoOrHi, Zero, ISD::SETEQ);
2328  SDValue SrcIsZero = DAG.getNode(ISD::AND, SL, SetCCVT, Lo0OrHi0, Hi0orLo0);
2329 
2330  // TODO: If i64 setcc is half rate, it can result in 1 fewer instruction
2331  // with the same cycles, otherwise it is slower.
2332  // SDValue SrcIsZero = DAG.getSetCC(SL, SetCCVT, Src,
2333  // DAG.getConstant(0, SL, MVT::i64), ISD::SETEQ);
2334 
2335  const SDValue Bits32 = DAG.getConstant(64, SL, MVT::i32);
2336 
2337  // The instruction returns -1 for 0 input, but the defined intrinsic
2338  // behavior is to return the number of bits.
2339  NewOpr = DAG.getNode(ISD::SELECT, SL, MVT::i32,
2340  SrcIsZero, Bits32, NewOpr);
2341  }
2342 
2343  return DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i64, NewOpr);
2344 }
2345 
2347  bool Signed) const {
2348  // Unsigned
2349  // cul2f(ulong u)
2350  //{
2351  // uint lz = clz(u);
2352  // uint e = (u != 0) ? 127U + 63U - lz : 0;
2353  // u = (u << lz) & 0x7fffffffffffffffUL;
2354  // ulong t = u & 0xffffffffffUL;
2355  // uint v = (e << 23) | (uint)(u >> 40);
2356  // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
2357  // return as_float(v + r);
2358  //}
2359  // Signed
2360  // cl2f(long l)
2361  //{
2362  // long s = l >> 63;
2363  // float r = cul2f((l + s) ^ s);
2364  // return s ? -r : r;
2365  //}
2366 
2367  SDLoc SL(Op);
2368  SDValue Src = Op.getOperand(0);
2369  SDValue L = Src;
2370 
2371  SDValue S;
2372  if (Signed) {
2373  const SDValue SignBit = DAG.getConstant(63, SL, MVT::i64);
2374  S = DAG.getNode(ISD::SRA, SL, MVT::i64, L, SignBit);
2375 
2376  SDValue LPlusS = DAG.getNode(ISD::ADD, SL, MVT::i64, L, S);
2377  L = DAG.getNode(ISD::XOR, SL, MVT::i64, LPlusS, S);
2378  }
2379 
2380  EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(),
2381  *DAG.getContext(), MVT::f32);
2382 
2383 
2384  SDValue ZeroI32 = DAG.getConstant(0, SL, MVT::i32);
2385  SDValue ZeroI64 = DAG.getConstant(0, SL, MVT::i64);
2386  SDValue LZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SL, MVT::i64, L);
2387  LZ = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LZ);
2388 
2389  SDValue K = DAG.getConstant(127U + 63U, SL, MVT::i32);
2390  SDValue E = DAG.getSelect(SL, MVT::i32,
2391  DAG.getSetCC(SL, SetCCVT, L, ZeroI64, ISD::SETNE),
2392  DAG.getNode(ISD::SUB, SL, MVT::i32, K, LZ),
2393  ZeroI32);
2394 
2395  SDValue U = DAG.getNode(ISD::AND, SL, MVT::i64,
2396  DAG.getNode(ISD::SHL, SL, MVT::i64, L, LZ),
2397  DAG.getConstant((-1ULL) >> 1, SL, MVT::i64));
2398 
2399  SDValue T = DAG.getNode(ISD::AND, SL, MVT::i64, U,
2400  DAG.getConstant(0xffffffffffULL, SL, MVT::i64));
2401 
2402  SDValue UShl = DAG.getNode(ISD::SRL, SL, MVT::i64,
2403  U, DAG.getConstant(40, SL, MVT::i64));
2404 
2405  SDValue V = DAG.getNode(ISD::OR, SL, MVT::i32,
2406  DAG.getNode(ISD::SHL, SL, MVT::i32, E, DAG.getConstant(23, SL, MVT::i32)),
2407  DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, UShl));
2408 
2409  SDValue C = DAG.getConstant(0x8000000000ULL, SL, MVT::i64);
2410  SDValue RCmp = DAG.getSetCC(SL, SetCCVT, T, C, ISD::SETUGT);
2411  SDValue TCmp = DAG.getSetCC(SL, SetCCVT, T, C, ISD::SETEQ);
2412 
2413  SDValue One = DAG.getConstant(1, SL, MVT::i32);
2414 
2415  SDValue VTrunc1 = DAG.getNode(ISD::AND, SL, MVT::i32, V, One);
2416 
2417  SDValue R = DAG.getSelect(SL, MVT::i32,
2418  RCmp,
2419  One,
2420  DAG.getSelect(SL, MVT::i32, TCmp, VTrunc1, ZeroI32));
2421  R = DAG.getNode(ISD::ADD, SL, MVT::i32, V, R);
2422  R = DAG.getNode(ISD::BITCAST, SL, MVT::f32, R);
2423 
2424  if (!Signed)
2425  return R;
2426 
2427  SDValue RNeg = DAG.getNode(ISD::FNEG, SL, MVT::f32, R);
2428  return DAG.getSelect(SL, MVT::f32, DAG.getSExtOrTrunc(S, SL, SetCCVT), RNeg, R);
2429 }
2430 
2432  bool Signed) const {
2433  SDLoc SL(Op);
2434  SDValue Src = Op.getOperand(0);
2435 
2436  SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
2437 
2439  DAG.getConstant(0, SL, MVT::i32));
2441  DAG.getConstant(1, SL, MVT::i32));
2442 
2443  SDValue CvtHi = DAG.getNode(Signed ? ISD::SINT_TO_FP : ISD::UINT_TO_FP,
2444  SL, MVT::f64, Hi);
2445 
2446  SDValue CvtLo = DAG.getNode(ISD::UINT_TO_FP, SL, MVT::f64, Lo);
2447 
2448  SDValue LdExp = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, CvtHi,
2449  DAG.getConstant(32, SL, MVT::i32));
2450  // TODO: Should this propagate fast-math-flags?
2451  return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo);
2452 }
2453 
2455  SelectionDAG &DAG) const {
2456  assert(Op.getOperand(0).getValueType() == MVT::i64 &&
2457  "operation should be legal");
2458 
2459  // TODO: Factor out code common with LowerSINT_TO_FP.
2460 
2461  EVT DestVT = Op.getValueType();
2462  if (Subtarget->has16BitInsts() && DestVT == MVT::f16) {
2463  SDLoc DL(Op);
2464  SDValue Src = Op.getOperand(0);
2465 
2466  SDValue IntToFp32 = DAG.getNode(Op.getOpcode(), DL, MVT::f32, Src);
2467  SDValue FPRoundFlag = DAG.getIntPtrConstant(0, SDLoc(Op));
2468  SDValue FPRound =
2469  DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, IntToFp32, FPRoundFlag);
2470 
2471  return FPRound;
2472  }
2473 
2474  if (DestVT == MVT::f32)
2475  return LowerINT_TO_FP32(Op, DAG, false);
2476 
2477  assert(DestVT == MVT::f64);
2478  return LowerINT_TO_FP64(Op, DAG, false);
2479 }
2480 
2482  SelectionDAG &DAG) const {
2483  assert(Op.getOperand(0).getValueType() == MVT::i64 &&
2484  "operation should be legal");
2485 
2486  // TODO: Factor out code common with LowerUINT_TO_FP.
2487 
2488  EVT DestVT = Op.getValueType();
2489  if (Subtarget->has16BitInsts() && DestVT == MVT::f16) {
2490  SDLoc DL(Op);
2491  SDValue Src = Op.getOperand(0);
2492 
2493  SDValue IntToFp32 = DAG.getNode(Op.getOpcode(), DL, MVT::f32, Src);
2494  SDValue FPRoundFlag = DAG.getIntPtrConstant(0, SDLoc(Op));
2495  SDValue FPRound =
2496  DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, IntToFp32, FPRoundFlag);
2497 
2498  return FPRound;
2499  }
2500 
2501  if (DestVT == MVT::f32)
2502  return LowerINT_TO_FP32(Op, DAG, true);
2503 
2504  assert(DestVT == MVT::f64);
2505  return LowerINT_TO_FP64(Op, DAG, true);
2506 }
2507 
2509  bool Signed) const {
2510  SDLoc SL(Op);
2511 
2512  SDValue Src = Op.getOperand(0);
2513 
2514  SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
2515 
2516  SDValue K0 = DAG.getConstantFP(BitsToDouble(UINT64_C(0x3df0000000000000)), SL,
2517  MVT::f64);
2518  SDValue K1 = DAG.getConstantFP(BitsToDouble(UINT64_C(0xc1f0000000000000)), SL,
2519  MVT::f64);
2520  // TODO: Should this propagate fast-math-flags?
2521  SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, Trunc, K0);
2522 
2523  SDValue FloorMul = DAG.getNode(ISD::FFLOOR, SL, MVT::f64, Mul);
2524 
2525 
2526  SDValue Fma = DAG.getNode(ISD::FMA, SL, MVT::f64, FloorMul, K1, Trunc);
2527 
2528  SDValue Hi = DAG.getNode(Signed ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, SL,
2529  MVT::i32, FloorMul);
2530  SDValue Lo = DAG.getNode(ISD::FP_TO_UINT, SL, MVT::i32, Fma);
2531 
2532  SDValue Result = DAG.getBuildVector(MVT::v2i32, SL, {Lo, Hi});
2533 
2534  return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Result);
2535 }
2536 
2538  SDLoc DL(Op);
2539  SDValue N0 = Op.getOperand(0);
2540 
2541  // Convert to target node to get known bits
2542  if (N0.getValueType() == MVT::f32)
2543  return DAG.getNode(AMDGPUISD::FP_TO_FP16, DL, Op.getValueType(), N0);
2544 
2546  // There is a generic expand for FP_TO_FP16 with unsafe fast math.
2547  return SDValue();
2548  }
2549 
2551 
2552  // f64 -> f16 conversion using round-to-nearest-even rounding mode.
2553  const unsigned ExpMask = 0x7ff;
2554  const unsigned ExpBiasf64 = 1023;
2555  const unsigned ExpBiasf16 = 15;
2556  SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
2557  SDValue One = DAG.getConstant(1, DL, MVT::i32);
2558  SDValue U = DAG.getNode(ISD::BITCAST, DL, MVT::i64, N0);
2559  SDValue UH = DAG.getNode(ISD::SRL, DL, MVT::i64, U,
2560  DAG.getConstant(32, DL, MVT::i64));
2561  UH = DAG.getZExtOrTrunc(UH, DL, MVT::i32);
2562  U = DAG.getZExtOrTrunc(U, DL, MVT::i32);
2563  SDValue E = DAG.getNode(ISD::SRL, DL, MVT::i32, UH,
2564  DAG.getConstant(20, DL, MVT::i64));
2565  E = DAG.getNode(ISD::AND, DL, MVT::i32, E,
2566  DAG.getConstant(ExpMask, DL, MVT::i32));
2567  // Subtract the fp64 exponent bias (1023) to get the real exponent and
2568  // add the f16 bias (15) to get the biased exponent for the f16 format.
2569  E = DAG.getNode(ISD::ADD, DL, MVT::i32, E,
2570  DAG.getConstant(-ExpBiasf64 + ExpBiasf16, DL, MVT::i32));
2571 
2572  SDValue M = DAG.getNode(ISD::SRL, DL, MVT::i32, UH,
2573  DAG.getConstant(8, DL, MVT::i32));
2574  M = DAG.getNode(ISD::AND, DL, MVT::i32, M,
2575  DAG.getConstant(0xffe, DL, MVT::i32));
2576 
2577  SDValue MaskedSig = DAG.getNode(ISD::AND, DL, MVT::i32, UH,
2578  DAG.getConstant(0x1ff, DL, MVT::i32));
2579  MaskedSig = DAG.getNode(ISD::OR, DL, MVT::i32, MaskedSig, U);
2580 
2581  SDValue Lo40Set = DAG.getSelectCC(DL, MaskedSig, Zero, Zero, One, ISD::SETEQ);
2582  M = DAG.getNode(ISD::OR, DL, MVT::i32, M, Lo40Set);
2583 
2584  // (M != 0 ? 0x0200 : 0) | 0x7c00;
2585  SDValue I = DAG.getNode(ISD::OR, DL, MVT::i32,
2586  DAG.getSelectCC(DL, M, Zero, DAG.getConstant(0x0200, DL, MVT::i32),
2587  Zero, ISD::SETNE), DAG.getConstant(0x7c00, DL, MVT::i32));
2588 
2589  // N = M | (E << 12);
2590  SDValue N = DAG.getNode(ISD::OR, DL, MVT::i32, M,
2591  DAG.getNode(ISD::SHL, DL, MVT::i32, E,
2592  DAG.getConstant(12, DL, MVT::i32)));
2593 
2594  // B = clamp(1-E, 0, 13);
2595  SDValue OneSubExp = DAG.getNode(ISD::SUB, DL, MVT::i32,
2596  One, E);
2597  SDValue B = DAG.getNode(ISD::SMAX, DL, MVT::i32, OneSubExp, Zero);
2598  B = DAG.getNode(ISD::SMIN, DL, MVT::i32, B,
2599  DAG.getConstant(13, DL, MVT::i32));
2600 
2601  SDValue SigSetHigh = DAG.getNode(ISD::OR, DL, MVT::i32, M,
2602  DAG.getConstant(0x1000, DL, MVT::i32));
2603 
2604  SDValue D = DAG.getNode(ISD::SRL, DL, MVT::i32, SigSetHigh, B);
2605  SDValue D0 = DAG.getNode(ISD::SHL, DL, MVT::i32, D, B);
2606  SDValue D1 = DAG.getSelectCC(DL, D0, SigSetHigh, One, Zero, ISD::SETNE);
2607  D = DAG.getNode(ISD::OR, DL, MVT::i32, D, D1);
2608 
2609  SDValue V = DAG.getSelectCC(DL, E, One, D, N, ISD::SETLT);
2610  SDValue VLow3 = DAG.getNode(ISD::AND, DL, MVT::i32, V,
2611  DAG.getConstant(0x7, DL, MVT::i32));
2612  V = DAG.getNode(ISD::SRL, DL, MVT::i32, V,
2613  DAG.getConstant(2, DL, MVT::i32));
2614  SDValue V0 = DAG.getSelectCC(DL, VLow3, DAG.getConstant(3, DL, MVT::i32),
2615  One, Zero, ISD::SETEQ);
2616  SDValue V1 = DAG.getSelectCC(DL, VLow3, DAG.getConstant(5, DL, MVT::i32),
2617  One, Zero, ISD::SETGT);
2618  V1 = DAG.getNode(ISD::OR, DL, MVT::i32, V0, V1);
2619  V = DAG.getNode(ISD::ADD, DL, MVT::i32, V, V1);
2620 
2621  V = DAG.getSelectCC(DL, E, DAG.getConstant(30, DL, MVT::i32),
2622  DAG.getConstant(0x7c00, DL, MVT::i32), V, ISD::SETGT);
2623  V = DAG.getSelectCC(DL, E, DAG.getConstant(1039, DL, MVT::i32),
2624  I, V, ISD::SETEQ);
2625 
2626  // Extract the sign bit.
2627  SDValue Sign = DAG.getNode(ISD::SRL, DL, MVT::i32, UH,
2628  DAG.getConstant(16, DL, MVT::i32));
2629  Sign = DAG.getNode(ISD::AND, DL, MVT::i32, Sign,
2630  DAG.getConstant(0x8000, DL, MVT::i32));
2631 
2632  V = DAG.getNode(ISD::OR, DL, MVT::i32, Sign, V);
2633  return DAG.getZExtOrTrunc(V, DL, Op.getValueType());
2634 }
2635 
2637  SelectionDAG &DAG) const {
2638  SDValue Src = Op.getOperand(0);
2639 
2640  // TODO: Factor out code common with LowerFP_TO_UINT.
2641 
2642  EVT SrcVT = Src.getValueType();
2643  if (Subtarget->has16BitInsts() && SrcVT == MVT::f16) {
2644  SDLoc DL(Op);
2645 
2646  SDValue FPExtend = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
2647  SDValue FpToInt32 =
2648  DAG.getNode(Op.getOpcode(), DL, MVT::i64, FPExtend);
2649 
2650  return FpToInt32;
2651  }
2652 
2653  if (Op.getValueType() == MVT::i64 && Src.getValueType() == MVT::f64)
2654  return LowerFP64_TO_INT(Op, DAG, true);
2655 
2656  return SDValue();
2657 }
2658 
2660  SelectionDAG &DAG) const {
2661  SDValue Src = Op.getOperand(0);
2662 
2663  // TODO: Factor out code common with LowerFP_TO_SINT.
2664 
2665  EVT SrcVT = Src.getValueType();
2666  if (Subtarget->has16BitInsts() && SrcVT == MVT::f16) {
2667  SDLoc DL(Op);
2668 
2669  SDValue FPExtend = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
2670  SDValue FpToInt32 =
2671  DAG.getNode(Op.getOpcode(), DL, MVT::i64, FPExtend);
2672 
2673  return FpToInt32;
2674  }
2675 
2676  if (Op.getValueType() == MVT::i64 && Src.getValueType() == MVT::f64)
2677  return LowerFP64_TO_INT(Op, DAG, false);
2678 
2679  return SDValue();
2680 }
2681 
2683  SelectionDAG &DAG) const {
2684  EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2685  MVT VT = Op.getSimpleValueType();
2686  MVT ScalarVT = VT.getScalarType();
2687 
2688  assert(VT.isVector());
2689 
2690  SDValue Src = Op.getOperand(0);
2691  SDLoc DL(Op);
2692 
2693  // TODO: Don't scalarize on Evergreen?
2694  unsigned NElts = VT.getVectorNumElements();
2696  DAG.ExtractVectorElements(Src, Args, 0, NElts);
2697 
2698  SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType());
2699  for (unsigned I = 0; I < NElts; ++I)
2700  Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp);
2701 
2702  return DAG.getBuildVector(VT, DL, Args);
2703 }
2704 
2705 //===----------------------------------------------------------------------===//
2706 // Custom DAG optimizations
2707 //===----------------------------------------------------------------------===//
2708 
2709 static bool isU24(SDValue Op, SelectionDAG &DAG) {
2710  return AMDGPUTargetLowering::numBitsUnsigned(Op, DAG) <= 24;
2711 }
2712 
2713 static bool isI24(SDValue Op, SelectionDAG &DAG) {
2714  EVT VT = Op.getValueType();
2715  return VT.getSizeInBits() >= 24 && // Types less than 24-bit should be treated
2716  // as unsigned 24-bit values.
2718 }
2719 
2720 static SDValue simplifyI24(SDNode *Node24,
2722  SelectionDAG &DAG = DCI.DAG;
2723  SDValue LHS = Node24->getOperand(0);
2724  SDValue RHS = Node24->getOperand(1);
2725 
2726  APInt Demanded = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 24);
2727 
2728  // First try to simplify using GetDemandedBits which allows the operands to
2729  // have other uses, but will only perform simplifications that involve
2730  // bypassing some nodes for this user.
2731  SDValue DemandedLHS = DAG.GetDemandedBits(LHS, Demanded);
2732  SDValue DemandedRHS = DAG.GetDemandedBits(RHS, Demanded);
2733  if (DemandedLHS || DemandedRHS)
2734  return DAG.getNode(Node24->getOpcode(), SDLoc(Node24), Node24->getVTList(),
2735  DemandedLHS ? DemandedLHS : LHS,
2736  DemandedRHS ? DemandedRHS : RHS);
2737 
2738  // Now try SimplifyDemandedBits which can simplify the nodes used by our
2739  // operands if this node is the only user.
2740  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2741  if (TLI.SimplifyDemandedBits(LHS, Demanded, DCI))
2742  return SDValue(Node24, 0);
2743  if (TLI.SimplifyDemandedBits(RHS, Demanded, DCI))
2744  return SDValue(Node24, 0);
2745 
2746  return SDValue();
2747 }
2748 
2749 template <typename IntTy>
2751  uint32_t Width, const SDLoc &DL) {
2752  if (Width + Offset < 32) {
2753  uint32_t Shl = static_cast<uint32_t>(Src0) << (32 - Offset - Width);
2754  IntTy Result = static_cast<IntTy>(Shl) >> (32 - Width);
2755  return DAG.getConstant(Result, DL, MVT::i32);
2756  }
2757 
2758  return DAG.getConstant(Src0 >> Offset, DL, MVT::i32);
2759 }
2760 
2761 static bool hasVolatileUser(SDNode *Val) {
2762  for (SDNode *U : Val->uses()) {
2763  if (MemSDNode *M = dyn_cast<MemSDNode>(U)) {
2764  if (M->isVolatile())
2765  return true;
2766  }
2767  }
2768 
2769  return false;
2770 }
2771 
2773  // i32 vectors are the canonical memory type.
2774  if (VT.getScalarType() == MVT::i32 || isTypeLegal(VT))
2775  return false;
2776 
2777  if (!VT.isByteSized())
2778  return false;
2779 
2780  unsigned Size = VT.getStoreSize();
2781 
2782  if ((Size == 1 || Size == 2 || Size == 4) && !VT.isVector())
2783  return false;
2784 
2785  if (Size == 3 || (Size > 4 && (Size % 4 != 0)))
2786  return false;
2787 
2788  return true;
2789 }
2790 
2791 // Replace load of an illegal type with a store of a bitcast to a friendlier
2792 // type.
2794  DAGCombinerInfo &DCI) const {
2795  if (!DCI.isBeforeLegalize())
2796  return SDValue();
2797 
2798  LoadSDNode *LN = cast<LoadSDNode>(N);
2799  if (LN->isVolatile() || !ISD::isNormalLoad(LN) || hasVolatileUser(LN))
2800  return SDValue();
2801 
2802  SDLoc SL(N);
2803  SelectionDAG &DAG = DCI.DAG;
2804  EVT VT = LN->getMemoryVT();
2805 
2806  unsigned Size = VT.getStoreSize();
2807  unsigned Align = LN->getAlignment();
2808  if (Align < Size && isTypeLegal(VT)) {
2809  bool IsFast;
2810  unsigned AS = LN->getAddressSpace();
2811 
2812  // Expand unaligned loads earlier than legalization. Due to visitation order
2813  // problems during legalization, the emitted instructions to pack and unpack
2814  // the bytes again are not eliminated in the case of an unaligned copy.
2815  if (!allowsMisalignedMemoryAccesses(VT, AS, Align, &IsFast)) {
2816  if (VT.isVector())
2817  return scalarizeVectorLoad(LN, DAG);
2818 
2819  SDValue Ops[2];
2820  std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(LN, DAG);
2821  return DAG.getMergeValues(Ops, SDLoc(N));
2822  }
2823 
2824  if (!IsFast)
2825  return SDValue();
2826  }
2827 
2828  if (!shouldCombineMemoryType(VT))
2829  return SDValue();
2830 
2831  EVT NewVT = getEquivalentMemType(*DAG.getContext(), VT);
2832 
2833  SDValue NewLoad
2834  = DAG.getLoad(NewVT, SL, LN->getChain(),
2835  LN->getBasePtr(), LN->getMemOperand());
2836 
2837  SDValue BC = DAG.getNode(ISD::BITCAST, SL, VT, NewLoad);
2838  DCI.CombineTo(N, BC, NewLoad.getValue(1));
2839  return SDValue(N, 0);
2840 }
2841 
2842 // Replace store of an illegal type with a store of a bitcast to a friendlier
2843 // type.
2845  DAGCombinerInfo &DCI) const {
2846  if (!DCI.isBeforeLegalize())
2847  return SDValue();
2848 
2849  StoreSDNode *SN = cast<StoreSDNode>(N);
2850  if (SN->isVolatile() || !ISD::isNormalStore(SN))
2851  return SDValue();
2852 
2853  EVT VT = SN->getMemoryVT();
2854  unsigned Size = VT.getStoreSize();
2855 
2856  SDLoc SL(N);
2857  SelectionDAG &DAG = DCI.DAG;
2858  unsigned Align = SN->getAlignment();
2859  if (Align < Size && isTypeLegal(VT)) {
2860  bool IsFast;
2861  unsigned AS = SN->getAddressSpace();
2862 
2863  // Expand unaligned stores earlier than legalization. Due to visitation
2864  // order problems during legalization, the emitted instructions to pack and
2865  // unpack the bytes again are not eliminated in the case of an unaligned
2866  // copy.
2867  if (!allowsMisalignedMemoryAccesses(VT, AS, Align, &IsFast)) {
2868  if (VT.isVector())
2869  return scalarizeVectorStore(SN, DAG);
2870 
2871  return expandUnalignedStore(SN, DAG);
2872  }
2873 
2874  if (!IsFast)
2875  return SDValue();
2876  }
2877 
2878  if (!shouldCombineMemoryType(VT))
2879  return SDValue();
2880 
2881  EVT NewVT = getEquivalentMemType(*DAG.getContext(), VT);
2882  SDValue Val = SN->getValue();
2883 
2884  //DCI.AddToWorklist(Val.getNode());
2885 
2886  bool OtherUses = !Val.hasOneUse();
2887  SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, NewVT, Val);
2888  if (OtherUses) {
2889  SDValue CastBack = DAG.getNode(ISD::BITCAST, SL, VT, CastVal);
2890  DAG.ReplaceAllUsesOfValueWith(Val, CastBack);
2891  }
2892 
2893  return DAG.getStore(SN->getChain(), SL, CastVal,
2894  SN->getBasePtr(), SN->getMemOperand());
2895 }
2896 
2897 // FIXME: This should go in generic DAG combiner with an isTruncateFree check,
2898 // but isTruncateFree is inaccurate for i16 now because of SALU vs. VALU
2899 // issues.
2901  DAGCombinerInfo &DCI) const {
2902  SelectionDAG &DAG = DCI.DAG;
2903  SDValue N0 = N->getOperand(0);
2904 
2905  // (vt2 (assertzext (truncate vt0:x), vt1)) ->
2906  // (vt2 (truncate (assertzext vt0:x, vt1)))
2907  if (N0.getOpcode() == ISD::TRUNCATE) {
2908  SDValue N1 = N->getOperand(1);
2909  EVT ExtVT = cast<VTSDNode>(N1)->getVT();
2910  SDLoc SL(N);
2911 
2912  SDValue Src = N0.getOperand(0);
2913  EVT SrcVT = Src.getValueType();
2914  if (SrcVT.bitsGE(ExtVT)) {
2915  SDValue NewInReg = DAG.getNode(N->getOpcode(), SL, SrcVT, Src, N1);
2916  return DAG.getNode(ISD::TRUNCATE, SL, N->getValueType(0), NewInReg);
2917  }
2918  }
2919 
2920  return SDValue();
2921 }
2922 /// Split the 64-bit value \p LHS into two 32-bit components, and perform the
2923 /// binary operation \p Opc to it with the corresponding constant operands.
2925  DAGCombinerInfo &DCI, const SDLoc &SL,
2926  unsigned Opc, SDValue LHS,
2927  uint32_t ValLo, uint32_t ValHi) const {
2928  SelectionDAG &DAG = DCI.DAG;
2929  SDValue Lo, Hi;
2930  std::tie(Lo, Hi) = split64BitValue(LHS, DAG);
2931 
2932  SDValue LoRHS = DAG.getConstant(ValLo, SL, MVT::i32);
2933  SDValue HiRHS = DAG.getConstant(ValHi, SL, MVT::i32);
2934 
2935  SDValue LoAnd = DAG.getNode(Opc, SL, MVT::i32, Lo, LoRHS);
2936  SDValue HiAnd = DAG.getNode(Opc, SL, MVT::i32, Hi, HiRHS);
2937 
2938  // Re-visit the ands. It's possible we eliminated one of them and it could
2939  // simplify the vector.
2940  DCI.AddToWorklist(Lo.getNode());
2941  DCI.AddToWorklist(Hi.getNode());
2942 
2943  SDValue Vec = DAG.getBuildVector(MVT::v2i32, SL, {LoAnd, HiAnd});
2944  return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
2945 }
2946 
2948  DAGCombinerInfo &DCI) const {
2949  EVT VT = N->getValueType(0);
2950 
2952  if (!RHS)
2953  return SDValue();
2954 
2955  SDValue LHS = N->getOperand(0);
2956  unsigned RHSVal = RHS->getZExtValue();
2957  if (!RHSVal)
2958  return LHS;
2959 
2960  SDLoc SL(N);
2961  SelectionDAG &DAG = DCI.DAG;
2962 
2963  switch (LHS->getOpcode()) {
2964  default:
2965  break;
2966  case ISD::ZERO_EXTEND:
2967  case ISD::SIGN_EXTEND:
2968  case ISD::ANY_EXTEND: {
2969  SDValue X = LHS->getOperand(0);
2970 
2971  if (VT == MVT::i32 && RHSVal == 16 && X.getValueType() == MVT::i16 &&
2973  // Prefer build_vector as the canonical form if packed types are legal.
2974  // (shl ([asz]ext i16:x), 16 -> build_vector 0, x
2975  SDValue Vec = DAG.getBuildVector(MVT::v2i16, SL,
2976  { DAG.getConstant(0, SL, MVT::i16), LHS->getOperand(0) });
2977  return DAG.getNode(ISD::BITCAST, SL, MVT::i32, Vec);
2978  }
2979 
2980  // shl (ext x) => zext (shl x), if shift does not overflow int
2981  if (VT != MVT::i64)
2982  break;
2983  KnownBits Known = DAG.computeKnownBits(X);
2984  unsigned LZ = Known.countMinLeadingZeros();
2985  if (LZ < RHSVal)
2986  break;
2987  EVT XVT = X.getValueType();
2988  SDValue Shl = DAG.getNode(ISD::SHL, SL, XVT, X, SDValue(RHS, 0));
2989  return DAG.getZExtOrTrunc(Shl, SL, VT);
2990  }
2991  }
2992 
2993  if (VT != MVT::i64)
2994  return SDValue();
2995 
2996  // i64 (shl x, C) -> (build_pair 0, (shl x, C -32))
2997 
2998  // On some subtargets, 64-bit shift is a quarter rate instruction. In the
2999  // common case, splitting this into a move and a 32-bit shift is faster and
3000  // the same code size.
3001  if (RHSVal < 32)
3002  return SDValue();
3003 
3004  SDValue ShiftAmt = DAG.getConstant(RHSVal - 32, SL, MVT::i32);
3005 
3006  SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
3007  SDValue NewShift = DAG.getNode(ISD::SHL, SL, MVT::i32, Lo, ShiftAmt);
3008 
3009  const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
3010 
3011  SDValue Vec = DAG.getBuildVector(MVT::v2i32, SL, {Zero, NewShift});
3012  return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
3013 }
3014 
3016  DAGCombinerInfo &DCI) const {
3017  if (N->getValueType(0) != MVT::i64)
3018  return SDValue();
3019 
3020  const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
3021  if (!RHS)
3022  return SDValue();
3023 
3024  SelectionDAG &DAG = DCI.DAG;
3025  SDLoc SL(N);
3026  unsigned RHSVal = RHS->getZExtValue();
3027 
3028  // (sra i64:x, 32) -> build_pair x, (sra hi_32(x), 31)
3029  if (RHSVal == 32) {
3030  SDValue Hi = getHiHalf64(N->getOperand(0), DAG);
3031  SDValue NewShift = DAG.getNode(ISD::SRA, SL, MVT::i32, Hi,
3032  DAG.getConstant(31, SL, MVT::i32));
3033 
3034  SDValue BuildVec = DAG.getBuildVector(MVT::v2i32, SL, {Hi, NewShift});
3035  return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildVec);
3036  }
3037 
3038  // (sra i64:x, 63) -> build_pair (sra hi_32(x), 31), (sra hi_32(x), 31)
3039  if (RHSVal == 63) {
3040  SDValue Hi = getHiHalf64(N->getOperand(0), DAG);
3041  SDValue NewShift = DAG.getNode(ISD::SRA, SL, MVT::i32, Hi,
3042  DAG.getConstant(31, SL, MVT::i32));
3043  SDValue BuildVec = DAG.getBuildVector(MVT::v2i32, SL, {NewShift, NewShift});
3044  return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildVec);
3045  }
3046 
3047  return SDValue();
3048 }
3049 
3051  DAGCombinerInfo &DCI) const {
3052  if (N->getValueType(0) != MVT::i64)
3053  return SDValue();
3054 
3055  const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
3056  if (!RHS)
3057  return SDValue();
3058 
3059  unsigned ShiftAmt = RHS->getZExtValue();
3060  if (ShiftAmt < 32)
3061  return SDValue();
3062 
3063  // srl i64:x, C for C >= 32
3064  // =>
3065  // build_pair (srl hi_32(x), C - 32), 0
3066 
3067  SelectionDAG &DAG = DCI.DAG;
3068  SDLoc SL(N);
3069 
3070  SDValue One = DAG.getConstant(1, SL, MVT::i32);
3071  SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
3072 
3073  SDValue VecOp = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, N->getOperand(0));
3075  VecOp, One);
3076 
3077  SDValue NewConst = DAG.getConstant(ShiftAmt - 32, SL, MVT::i32);
3078  SDValue NewShift = DAG.getNode(ISD::SRL, SL, MVT::i32, Hi, NewConst);
3079 
3080  SDValue BuildPair = DAG.getBuildVector(MVT::v2i32, SL, {NewShift, Zero});
3081 
3082  return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildPair);
3083 }
3084 
3086  SDNode *N, DAGCombinerInfo &DCI) const {
3087  SDLoc SL(N);
3088  SelectionDAG &DAG = DCI.DAG;
3089  EVT VT = N->getValueType(0);
3090  SDValue Src = N->getOperand(0);
3091 
3092  // vt1 (truncate (bitcast (build_vector vt0:x, ...))) -> vt1 (bitcast vt0:x)
3093  if (Src.getOpcode() == ISD::BITCAST) {
3094  SDValue Vec = Src.getOperand(0);
3095  if (Vec.getOpcode() == ISD::BUILD_VECTOR) {
3096  SDValue Elt0 = Vec.getOperand(0);
3097  EVT EltVT = Elt0.getValueType();
3098  if (VT.getSizeInBits() <= EltVT.getSizeInBits()) {
3099  if (EltVT.isFloatingPoint()) {
3100  Elt0 = DAG.getNode(ISD::BITCAST, SL,
3101  EltVT.changeTypeToInteger(), Elt0);
3102  }
3103 
3104  return DAG.getNode(ISD::TRUNCATE, SL, VT, Elt0);
3105  }
3106  }
3107  }
3108 
3109  // Equivalent of above for accessing the high element of a vector as an
3110  // integer operation.
3111  // trunc (srl (bitcast (build_vector x, y))), 16 -> trunc (bitcast y)
3112  if (Src.getOpcode() == ISD::SRL && !VT.isVector()) {
3113  if (auto K = isConstOrConstSplat(Src.getOperand(1))) {
3114  if (2 * K->getZExtValue() == Src.getValueType().getScalarSizeInBits()) {
3115  SDValue BV = stripBitcast(Src.getOperand(0));
3116  if (BV.getOpcode() == ISD::BUILD_VECTOR &&
3117  BV.getValueType().getVectorNumElements() == 2) {
3118  SDValue SrcElt = BV.getOperand(1);
3119  EVT SrcEltVT = SrcElt.getValueType();
3120  if (SrcEltVT.isFloatingPoint()) {
3121  SrcElt = DAG.getNode(ISD::BITCAST, SL,
3122  SrcEltVT.changeTypeToInteger(), SrcElt);
3123  }
3124 
3125  return DAG.getNode(ISD::TRUNCATE, SL, VT, SrcElt);
3126  }
3127  }
3128  }
3129  }
3130 
3131  // Partially shrink 64-bit shifts to 32-bit if reduced to 16-bit.
3132  //
3133  // i16 (trunc (srl i64:x, K)), K <= 16 ->
3134  // i16 (trunc (srl (i32 (trunc x), K)))
3135  if (VT.getScalarSizeInBits() < 32) {
3136  EVT SrcVT = Src.getValueType();
3137  if (SrcVT.getScalarSizeInBits() > 32 &&
3138  (Src.getOpcode() == ISD::SRL ||
3139  Src.getOpcode() == ISD::SRA ||
3140  Src.getOpcode() == ISD::SHL)) {
3141  SDValue Amt = Src.getOperand(1);
3142  KnownBits Known = DAG.computeKnownBits(Amt);
3143  unsigned Size = VT.getScalarSizeInBits();
3144  if ((Known.isConstant() && Known.getConstant().ule(Size)) ||
3145  (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size))) {
3146  EVT MidVT = VT.isVector() ?
3149 
3150  EVT NewShiftVT = getShiftAmountTy(MidVT, DAG.getDataLayout());
3151  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MidVT,
3152  Src.getOperand(0));
3153  DCI.AddToWorklist(Trunc.getNode());
3154 
3155  if (Amt.getValueType() != NewShiftVT) {
3156  Amt = DAG.getZExtOrTrunc(Amt, SL, NewShiftVT);
3157  DCI.AddToWorklist(Amt.getNode());
3158  }
3159 
3160  SDValue ShrunkShift = DAG.getNode(Src.getOpcode(), SL, MidVT,
3161  Trunc, Amt);
3162  return DAG.getNode(ISD::TRUNCATE, SL, VT, ShrunkShift);
3163  }
3164  }
3165  }
3166 
3167  return SDValue();
3168 }
3169 
3170 // We need to specifically handle i64 mul here to avoid unnecessary conversion
3171 // instructions. If we only match on the legalized i64 mul expansion,
3172 // SimplifyDemandedBits will be unable to remove them because there will be
3173 // multiple uses due to the separate mul + mulh[su].
3174 static SDValue getMul24(SelectionDAG &DAG, const SDLoc &SL,
3175  SDValue N0, SDValue N1, unsigned Size, bool Signed) {
3176  if (Size <= 32) {
3177  unsigned MulOpc = Signed ? AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24;
3178  return DAG.getNode(MulOpc, SL, MVT::i32, N0, N1);
3179  }
3180 
3181  // Because we want to eliminate extension instructions before the
3182  // operation, we need to create a single user here (i.e. not the separate
3183  // mul_lo + mul_hi) so that SimplifyDemandedBits will deal with it.
3184 
3185  unsigned MulOpc = Signed ? AMDGPUISD::MUL_LOHI_I24 : AMDGPUISD::MUL_LOHI_U24;
3186 
3187  SDValue Mul = DAG.getNode(MulOpc, SL,
3188  DAG.getVTList(MVT::i32, MVT::i32), N0, N1);
3189 
3190  return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64,
3191  Mul.getValue(0), Mul.getValue(1));
3192 }
3193 
3195  DAGCombinerInfo &DCI) const {
3196  EVT VT = N->getValueType(0);
3197 
3198  unsigned Size = VT.getSizeInBits();
3199  if (VT.isVector() || Size > 64)
3200  return SDValue();
3201 
3202  // There are i16 integer mul/mad.
3203  if (Subtarget->has16BitInsts() && VT.getScalarType().bitsLE(MVT::i16))
3204  return SDValue();
3205 
3206  SelectionDAG &DAG = DCI.DAG;
3207  SDLoc DL(N);
3208 
3209  SDValue N0 = N->getOperand(0);
3210  SDValue N1 = N->getOperand(1);
3211 
3212  // SimplifyDemandedBits has the annoying habit of turning useful zero_extends
3213  // in the source into any_extends if the result of the mul is truncated. Since
3214  // we can assume the high bits are whatever we want, use the underlying value
3215  // to avoid the unknown high bits from interfering.
3216  if (N0.getOpcode() == ISD::ANY_EXTEND)
3217  N0 = N0.getOperand(0);
3218 
3219  if (N1.getOpcode() == ISD::ANY_EXTEND)
3220  N1 = N1.getOperand(0);
3221 
3222  SDValue Mul;
3223 
3224  if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
3225  N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
3226  N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
3227  Mul = getMul24(DAG, DL, N0, N1, Size, false);
3228  } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
3229  N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
3230  N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
3231  Mul = getMul24(DAG, DL, N0, N1, Size, true);
3232  } else {
3233  return SDValue();
3234  }
3235 
3236  // We need to use sext even for MUL_U24, because MUL_U24 is used
3237  // for signed multiply of 8 and 16-bit types.
3238  return DAG.getSExtOrTrunc(Mul, DL, VT);
3239 }
3240 
3242  DAGCombinerInfo &DCI) const {
3243  EVT VT = N->getValueType(0);
3244 
3245  if (!Subtarget->hasMulI24() || VT.isVector())
3246  return SDValue();
3247 
3248  SelectionDAG &DAG = DCI.DAG;
3249  SDLoc DL(N);
3250 
3251  SDValue N0 = N->getOperand(0);
3252  SDValue N1 = N->getOperand(1);
3253 
3254  if (!isI24(N0, DAG) || !isI24(N1, DAG))
3255  return SDValue();
3256 
3257  N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
3258  N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
3259 
3260  SDValue Mulhi = DAG.getNode(AMDGPUISD::MULHI_I24, DL, MVT::i32, N0, N1);
3261  DCI.AddToWorklist(Mulhi.getNode());
3262  return DAG.getSExtOrTrunc(Mulhi, DL, VT);
3263 }
3264 
3266  DAGCombinerInfo &DCI) const {
3267  EVT VT = N->getValueType(0);
3268 
3269  if (!Subtarget->hasMulU24() || VT.isVector() || VT.getSizeInBits() > 32)
3270  return SDValue();
3271 
3272  SelectionDAG &DAG = DCI.DAG;
3273  SDLoc DL(N);
3274 
3275  SDValue N0 = N->getOperand(0);
3276  SDValue N1 = N->getOperand(1);
3277 
3278  if (!isU24(N0, DAG) || !isU24(N1, DAG))
3279  return SDValue();
3280 
3281  N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
3282  N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
3283 
3284  SDValue Mulhi = DAG.getNode(AMDGPUISD::MULHI_U24, DL, MVT::i32, N0, N1);
3285  DCI.AddToWorklist(Mulhi.getNode());
3286  return DAG.getZExtOrTrunc(Mulhi, DL, VT);
3287 }
3288 
3290  SDNode *N, DAGCombinerInfo &DCI) const {
3291  SelectionDAG &DAG = DCI.DAG;
3292 
3293  // Simplify demanded bits before splitting into multiple users.
3294  if (SDValue V = simplifyI24(N, DCI))
3295  return V;
3296 
3297  SDValue N0 = N->getOperand(0);
3298  SDValue N1 = N->getOperand(1);
3299 
3300  bool Signed = (N->getOpcode() == AMDGPUISD::MUL_LOHI_I24);
3301 
3302  unsigned MulLoOpc = Signed ? AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24;
3303  unsigned MulHiOpc = Signed ? AMDGPUISD::MULHI_I24 : AMDGPUISD::MULHI_U24;
3304 
3305  SDLoc SL(N);
3306 
3307  SDValue MulLo = DAG.getNode(MulLoOpc, SL, MVT::i32, N0, N1);
3308  SDValue MulHi = DAG.getNode(MulHiOpc, SL, MVT::i32, N0, N1);
3309  return DAG.getMergeValues({ MulLo, MulHi }, SL);
3310 }
3311 
3312 static bool isNegativeOne(SDValue Val) {
3313  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val))
3314  return C->isAllOnesValue();
3315  return false;
3316 }
3317 
3318 SDValue AMDGPUTargetLowering::getFFBX_U32(SelectionDAG &DAG,
3319  SDValue Op,
3320  const SDLoc &DL,
3321  unsigned Opc) const {
3322  EVT VT = Op.getValueType();
3323  EVT LegalVT = getTypeToTransformTo(*DAG.getContext(), VT);
3324  if (LegalVT != MVT::i32 && (Subtarget->has16BitInsts() &&
3325  LegalVT != MVT::i16))
3326  return SDValue();
3327 
3328  if (VT != MVT::i32)
3329  Op = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Op);
3330 
3331  SDValue FFBX = DAG.getNode(Opc, DL, MVT::i32, Op);
3332  if (VT != MVT::i32)
3333  FFBX = DAG.getNode(ISD::TRUNCATE, DL, VT, FFBX);
3334 
3335  return FFBX;
3336 }
3337 
3338 // The native instructions return -1 on 0 input. Optimize out a select that
3339 // produces -1 on 0.
3340 //
3341 // TODO: If zero is not undef, we could also do this if the output is compared
3342 // against the bitwidth.
3343 //
3344 // TODO: Should probably combine against FFBH_U32 instead of ctlz directly.
3346  SDValue LHS, SDValue RHS,
3347  DAGCombinerInfo &DCI) const {
3348  ConstantSDNode *CmpRhs = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
3349  if (!CmpRhs || !CmpRhs->isNullValue())
3350  return SDValue();
3351 
3352  SelectionDAG &DAG = DCI.DAG;
3353  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
3354  SDValue CmpLHS = Cond.getOperand(0);
3355 
3356  unsigned Opc = isCttzOpc(RHS.getOpcode()) ? AMDGPUISD::FFBL_B32 :
3358 
3359  // select (setcc x, 0, eq), -1, (ctlz_zero_undef x) -> ffbh_u32 x
3360  // select (setcc x, 0, eq), -1, (cttz_zero_undef x) -> ffbl_u32 x
3361  if (CCOpcode == ISD::SETEQ &&
3362  (isCtlzOpc(RHS.getOpcode()) || isCttzOpc(RHS.getOpcode())) &&
3363  RHS.getOperand(0) == CmpLHS &&
3364  isNegativeOne(LHS)) {
3365  return getFFBX_U32(DAG, CmpLHS, SL, Opc);
3366  }
3367 
3368  // select (setcc x, 0, ne), (ctlz_zero_undef x), -1 -> ffbh_u32 x
3369  // select (setcc x, 0, ne), (cttz_zero_undef x), -1 -> ffbl_u32 x
3370  if (CCOpcode == ISD::SETNE &&
3371  (isCtlzOpc(LHS.getOpcode()) || isCttzOpc(RHS.getOpcode())) &&
3372  LHS.getOperand(0) == CmpLHS &&
3373  isNegativeOne(RHS)) {
3374  return getFFBX_U32(DAG, CmpLHS, SL, Opc);
3375  }
3376 
3377  return SDValue();
3378 }
3379 
3381  unsigned Op,
3382  const SDLoc &SL,
3383  SDValue Cond,
3384  SDValue N1,
3385  SDValue N2) {
3386  SelectionDAG &DAG = DCI.DAG;
3387  EVT VT = N1.getValueType();
3388 
3389  SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT, Cond,
3390  N1.getOperand(0), N2.getOperand(0));
3391  DCI.AddToWorklist(NewSelect.getNode());
3392  return DAG.getNode(Op, SL, VT, NewSelect);
3393 }
3394 
3395 // Pull a free FP operation out of a select so it may fold into uses.
3396 //
3397 // select c, (fneg x), (fneg y) -> fneg (select c, x, y)
3398 // select c, (fneg x), k -> fneg (select c, x, (fneg k))
3399 //
3400 // select c, (fabs x), (fabs y) -> fabs (select c, x, y)
3401 // select c, (fabs x), +k -> fabs (select c, x, k)
3403  SDValue N) {
3404  SelectionDAG &DAG = DCI.DAG;
3405  SDValue Cond = N.getOperand(0);
3406  SDValue LHS = N.getOperand(1);
3407  SDValue RHS = N.getOperand(2);
3408 
3409  EVT VT = N.getValueType();
3410  if ((LHS.getOpcode() == ISD::FABS && RHS.getOpcode() == ISD::FABS) ||
3411  (LHS.getOpcode() == ISD::FNEG && RHS.getOpcode() == ISD::FNEG)) {
3412  return distributeOpThroughSelect(DCI, LHS.getOpcode(),
3413  SDLoc(N), Cond, LHS, RHS);
3414  }
3415 
3416  bool Inv = false;
3417  if (RHS.getOpcode() == ISD::FABS || RHS.getOpcode() == ISD::FNEG) {
3418  std::swap(LHS, RHS);
3419  Inv = true;
3420  }
3421 
3422  // TODO: Support vector constants.
3424  if ((LHS.getOpcode() == ISD::FNEG || LHS.getOpcode() == ISD::FABS) && CRHS) {
3425  SDLoc SL(N);
3426  // If one side is an fneg/fabs and the other is a constant, we can push the
3427  // fneg/fabs down. If it's an fabs, the constant needs to be non-negative.
3428  SDValue NewLHS = LHS.getOperand(0);
3429  SDValue NewRHS = RHS;
3430 
3431  // Careful: if the neg can be folded up, don't try to pull it back down.
3432  bool ShouldFoldNeg = true;
3433 
3434  if (NewLHS.hasOneUse()) {
3435  unsigned Opc = NewLHS.getOpcode();
3436  if (LHS.getOpcode() == ISD::FNEG && fnegFoldsIntoOp(Opc))
3437  ShouldFoldNeg = false;
3438  if (LHS.getOpcode() == ISD::FABS && Opc == ISD::FMUL)
3439  ShouldFoldNeg = false;
3440  }
3441 
3442  if (ShouldFoldNeg) {
3443  if (LHS.getOpcode() == ISD::FNEG)
3444  NewRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
3445  else if (CRHS->isNegative())
3446  return SDValue();
3447 
3448  if (Inv)
3449  std::swap(NewLHS, NewRHS);
3450 
3451  SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT,
3452  Cond, NewLHS, NewRHS);
3453  DCI.AddToWorklist(NewSelect.getNode());
3454  return DAG.getNode(LHS.getOpcode(), SL, VT, NewSelect);
3455  }
3456  }
3457 
3458  return SDValue();
3459 }
3460 
3461 
3463  DAGCombinerInfo &DCI) const {
3464  if (SDValue Folded = foldFreeOpFromSelect(DCI, SDValue(N, 0)))
3465  return Folded;
3466 
3467  SDValue Cond = N->getOperand(0);
3468  if (Cond.getOpcode() != ISD::SETCC)
3469  return SDValue();
3470 
3471  EVT VT = N->getValueType(0);
3472  SDValue LHS = Cond.getOperand(0);
3473  SDValue RHS = Cond.getOperand(1);
3474  SDValue CC = Cond.getOperand(2);
3475 
3476  SDValue True = N->getOperand(1);
3477  SDValue False = N->getOperand(2);
3478 
3479  if (Cond.hasOneUse()) { // TODO: Look for multiple select uses.
3480  SelectionDAG &DAG = DCI.DAG;
3481  if ((DAG.isConstantValueOfAnyType(True) ||
3482  DAG.isConstantValueOfAnyType(True)) &&
3483  (!DAG.isConstantValueOfAnyType(False) &&
3484  !DAG.isConstantValueOfAnyType(False))) {
3485  // Swap cmp + select pair to move constant to false input.
3486  // This will allow using VOPC cndmasks more often.
3487  // select (setcc x, y), k, x -> select (setcc y, x) x, x
3488 
3489  SDLoc SL(N);
3490  ISD::CondCode NewCC = getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
3491  LHS.getValueType().isInteger());
3492 
3493  SDValue NewCond = DAG.getSetCC(SL, Cond.getValueType(), LHS, RHS, NewCC);
3494  return DAG.getNode(ISD::SELECT, SL, VT, NewCond, False, True);
3495  }
3496 
3497  if (VT == MVT::f32 && Subtarget->hasFminFmaxLegacy()) {
3498  SDValue MinMax
3499  = combineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI);
3500  // Revisit this node so we can catch min3/max3/med3 patterns.
3501  //DCI.AddToWorklist(MinMax.getNode());
3502  return MinMax;
3503  }
3504  }
3505 
3506  // There's no reason to not do this if the condition has other uses.
3507  return performCtlz_CttzCombine(SDLoc(N), Cond, True, False, DCI);
3508 }
3509 
3510 static bool isInv2Pi(const APFloat &APF) {
3511  static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118));
3512  static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983));
3513  static const APFloat KF64(APFloat::IEEEdouble(), APInt(64, 0x3fc45f306dc9c882));
3514 
3515  return APF.bitwiseIsEqual(KF16) ||
3516  APF.bitwiseIsEqual(KF32) ||
3517  APF.bitwiseIsEqual(KF64);
3518 }
3519 
3520 // 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an
3521 // additional cost to negate them.
3523  if (const ConstantFPSDNode *C = isConstOrConstSplatFP(N)) {
3524  if (C->isZero() && !C->isNegative())
3525  return true;
3526 
3527  if (Subtarget->hasInv2PiInlineImm() && isInv2Pi(C->getValueAPF()))
3528  return true;
3529  }
3530 
3531  return false;
3532 }
3533 
3534 static unsigned inverseMinMax(unsigned Opc) {
3535  switch (Opc) {
3536  case ISD::FMAXNUM:
3537  return ISD::FMINNUM;
3538  case ISD::FMINNUM:
3539  return ISD::FMAXNUM;
3540  case ISD::FMAXNUM_IEEE:
3541  return ISD::FMINNUM_IEEE;
3542  case ISD::FMINNUM_IEEE:
3543  return ISD::FMAXNUM_IEEE;
3545  return AMDGPUISD::FMIN_LEGACY;
3547  return AMDGPUISD::FMAX_LEGACY;
3548  default:
3549  llvm_unreachable("invalid min/max opcode");
3550  }
3551 }
3552 
3554  DAGCombinerInfo &DCI) const {
3555  SelectionDAG &DAG = DCI.DAG;
3556  SDValue N0 = N->getOperand(0);
3557  EVT VT = N->getValueType(0);
3558 
3559  unsigned Opc = N0.getOpcode();
3560 
3561  // If the input has multiple uses and we can either fold the negate down, or
3562  // the other uses cannot, give up. This both prevents unprofitable
3563  // transformations and infinite loops: we won't repeatedly try to fold around
3564  // a negate that has no 'good' form.
3565  if (N0.hasOneUse()) {
3566  // This may be able to fold into the source, but at a code size cost. Don't
3567  // fold if the fold into the user is free.
3568  if (allUsesHaveSourceMods(N, 0))
3569  return SDValue();
3570  } else {
3571  if (fnegFoldsIntoOp(Opc) &&
3573  return SDValue();
3574  }
3575 
3576  SDLoc SL(N);
3577  switch (Opc) {
3578  case ISD::FADD: {
3579  if (!mayIgnoreSignedZero(N0))
3580  return SDValue();
3581 
3582  // (fneg (fadd x, y)) -> (fadd (fneg x), (fneg y))
3583  SDValue LHS = N0.getOperand(0);
3584  SDValue RHS = N0.getOperand(1);
3585 
3586  if (LHS.getOpcode() != ISD::FNEG)
3587  LHS = DAG.getNode(ISD::FNEG, SL, VT, LHS);
3588  else
3589  LHS = LHS.getOperand(0);
3590 
3591  if (RHS.getOpcode() != ISD::FNEG)
3592  RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
3593  else
3594  RHS = RHS.getOperand(0);
3595 
3596  SDValue Res = DAG.getNode(ISD::FADD, SL, VT, LHS, RHS, N0->getFlags());
3597  if (!N0.hasOneUse())
3598  DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
3599  return Res;
3600  }
3601  case ISD::FMUL:
3602  case AMDGPUISD::FMUL_LEGACY: {
3603  // (fneg (fmul x, y)) -> (fmul x, (fneg y))
3604  // (fneg (fmul_legacy x, y)) -> (fmul_legacy x, (fneg y))
3605  SDValue LHS = N0.getOperand(0);
3606  SDValue RHS = N0.getOperand(1);
3607 
3608  if (LHS.getOpcode() == ISD::FNEG)
3609  LHS = LHS.getOperand(0);
3610  else if (RHS.getOpcode() == ISD::FNEG)
3611  RHS = RHS.getOperand(0);
3612  else
3613  RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
3614 
3615  SDValue Res = DAG.getNode(Opc, SL, VT, LHS, RHS, N0->getFlags());
3616  if (!N0.hasOneUse())
3617  DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
3618  return Res;
3619  }
3620  case ISD::FMA:
3621  case ISD::FMAD: {
3622  if (!mayIgnoreSignedZero(N0))
3623  return SDValue();
3624 
3625  // (fneg (fma x, y, z)) -> (fma x, (fneg y), (fneg z))
3626  SDValue LHS = N0.getOperand(0);
3627  SDValue MHS = N0.getOperand(1);
3628  SDValue RHS = N0.getOperand(2);
3629 
3630  if (LHS.getOpcode() == ISD::FNEG)
3631  LHS = LHS.getOperand(0);
3632  else if (MHS.getOpcode() == ISD::FNEG)
3633  MHS = MHS.getOperand(0);
3634  else
3635  MHS = DAG.getNode(ISD::FNEG, SL, VT, MHS);
3636 
3637  if (RHS.getOpcode() != ISD::FNEG)
3638  RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
3639  else
3640  RHS = RHS.getOperand(0);
3641 
3642  SDValue Res = DAG.getNode(Opc, SL, VT, LHS, MHS, RHS);
3643  if (!N0.hasOneUse())
3644  DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
3645  return Res;
3646  }
3647  case ISD::FMAXNUM:
3648  case ISD::FMINNUM:
3649  case ISD::FMAXNUM_IEEE:
3650  case ISD::FMINNUM_IEEE:
3652  case AMDGPUISD::FMIN_LEGACY: {
3653  // fneg (fmaxnum x, y) -> fminnum (fneg x), (fneg y)
3654  // fneg (fminnum x, y) -> fmaxnum (fneg x), (fneg y)
3655  // fneg (fmax_legacy x, y) -> fmin_legacy (fneg x), (fneg y)
3656  // fneg (fmin_legacy x, y) -> fmax_legacy (fneg x), (fneg y)
3657 
3658  SDValue LHS = N0.getOperand(0);
3659  SDValue RHS = N0.getOperand(1);
3660 
3661  // 0 doesn't have a negated inline immediate.
3662  // TODO: This constant check should be generalized to other operations.
3663  if (isConstantCostlierToNegate(RHS))
3664  return SDValue();
3665 
3666  SDValue NegLHS = DAG.getNode(ISD::FNEG, SL, VT, LHS);
3667  SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
3668  unsigned Opposite = inverseMinMax(Opc);
3669 
3670  SDValue Res = DAG.getNode(Opposite, SL, VT, NegLHS, NegRHS, N0->getFlags());
3671  if (!N0.hasOneUse())
3672  DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
3673  return Res;
3674  }
3675  case AMDGPUISD::FMED3: {
3676  SDValue Ops[3];
3677  for (unsigned I = 0; I < 3; ++I)
3678  Ops[I] = DAG.getNode(ISD::FNEG, SL, VT, N0->getOperand(I), N0->getFlags());
3679 
3680  SDValue Res = DAG.getNode(AMDGPUISD::FMED3, SL, VT, Ops, N0->getFlags());
3681  if (!N0.hasOneUse())
3682  DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
3683  return Res;
3684  }
3685  case ISD::FP_EXTEND:
3686  case ISD::FTRUNC:
3687  case ISD::FRINT:
3688  case ISD::FNEARBYINT: // XXX - Should fround be handled?
3689  case ISD::FSIN:
3690  case ISD::FCANONICALIZE:
3691  case AMDGPUISD::RCP:
3692  case AMDGPUISD::RCP_LEGACY:
3693  case AMDGPUISD::RCP_IFLAG:
3694  case AMDGPUISD::SIN_HW: {
3695  SDValue CvtSrc = N0.getOperand(0);
3696  if (CvtSrc.getOpcode() == ISD::FNEG) {
3697  // (fneg (fp_extend (fneg x))) -> (fp_extend x)
3698  // (fneg (rcp (fneg x))) -> (rcp x)
3699  return DAG.getNode(Opc, SL, VT, CvtSrc.getOperand(0));
3700  }
3701 
3702  if (!N0.hasOneUse())
3703  return SDValue();
3704 
3705  // (fneg (fp_extend x)) -> (fp_extend (fneg x))
3706  // (fneg (rcp x)) -> (rcp (fneg x))
3707  SDValue Neg = DAG.getNode(ISD::FNEG, SL, CvtSrc.getValueType(), CvtSrc);
3708  return DAG.getNode(Opc, SL, VT, Neg, N0->getFlags());
3709  }
3710  case ISD::FP_ROUND: {
3711  SDValue CvtSrc = N0.getOperand(0);
3712 
3713  if (CvtSrc.getOpcode() == ISD::FNEG) {
3714  // (fneg (fp_round (fneg x))) -> (fp_round x)
3715  return DAG.getNode(ISD::FP_ROUND, SL, VT,
3716  CvtSrc.getOperand(0), N0.getOperand(1));
3717  }
3718 
3719  if (!N0.hasOneUse())
3720  return SDValue();
3721 
3722  // (fneg (fp_round x)) -> (fp_round (fneg x))
3723  SDValue Neg = DAG.getNode(ISD::FNEG, SL, CvtSrc.getValueType(), CvtSrc);
3724  return DAG.getNode(ISD::FP_ROUND, SL, VT, Neg, N0.getOperand(1));
3725  }
3726  case ISD::FP16_TO_FP: {
3727  // v_cvt_f32_f16 supports source modifiers on pre-VI targets without legal
3728  // f16, but legalization of f16 fneg ends up pulling it out of the source.
3729  // Put the fneg back as a legal source operation that can be matched later.
3730  SDLoc SL(N);
3731 
3732  SDValue Src = N0.getOperand(0);
3733  EVT SrcVT = Src.getValueType();
3734 
3735  // fneg (fp16_to_fp x) -> fp16_to_fp (xor x, 0x8000)
3736  SDValue IntFNeg = DAG.getNode(ISD::XOR, SL, SrcVT, Src,
3737  DAG.getConstant(0x8000, SL, SrcVT));
3738  return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFNeg);
3739  }
3740  default:
3741  return SDValue();
3742  }
3743 }
3744 
3746  DAGCombinerInfo &DCI) const {
3747  SelectionDAG &DAG = DCI.DAG;
3748  SDValue N0 = N->getOperand(0);
3749 
3750  if (!N0.hasOneUse())
3751  return SDValue();
3752 
3753  switch (N0.getOpcode()) {
3754  case ISD::FP16_TO_FP: {
3755  assert(!Subtarget->has16BitInsts() && "should only see if f16 is illegal");
3756  SDLoc SL(N);
3757  SDValue Src = N0.getOperand(0);
3758  EVT SrcVT = Src.getValueType();
3759 
3760  // fabs (fp16_to_fp x) -> fp16_to_fp (and x, 0x7fff)
3761  SDValue IntFAbs = DAG.getNode(ISD::AND, SL, SrcVT, Src,
3762  DAG.getConstant(0x7fff, SL, SrcVT));
3763  return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFAbs);
3764  }
3765  default:
3766  return SDValue();
3767  }
3768 }
3769 
3771  DAGCombinerInfo &DCI) const {
3772  const auto *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0));
3773  if (!CFP)
3774  return SDValue();
3775 
3776  // XXX - Should this flush denormals?
3777  const APFloat &Val = CFP->getValueAPF();
3778  APFloat One(Val.getSemantics(), "1.0");
3779  return DCI.DAG.getConstantFP(One / Val, SDLoc(N), N->getValueType(0));
3780 }
3781 
3783  DAGCombinerInfo &DCI) const {
3784  SelectionDAG &DAG = DCI.DAG;
3785  SDLoc DL(N);
3786 
3787  switch(N->getOpcode()) {
3788  default:
3789  break;
3790  case ISD::BITCAST: {
3791  EVT DestVT = N->getValueType(0);
3792 
3793  // Push casts through vector builds. This helps avoid emitting a large
3794  // number of copies when materializing floating point vector constants.
3795  //
3796  // vNt1 bitcast (vNt0 (build_vector t0:x, t0:y)) =>
3797  // vnt1 = build_vector (t1 (bitcast t0:x)), (t1 (bitcast t0:y))
3798  if (DestVT.isVector()) {
3799  SDValue Src = N->getOperand(0);
3800  if (Src.getOpcode() == ISD::BUILD_VECTOR) {
3801  EVT SrcVT = Src.getValueType();
3802  unsigned NElts = DestVT.getVectorNumElements();
3803 
3804  if (SrcVT.getVectorNumElements() == NElts) {
3805  EVT DestEltVT = DestVT.getVectorElementType();
3806 
3807  SmallVector<SDValue, 8> CastedElts;
3808  SDLoc SL(N);
3809  for (unsigned I = 0, E = SrcVT.getVectorNumElements(); I != E; ++I) {
3810  SDValue Elt = Src.getOperand(I);
3811  CastedElts.push_back(DAG.getNode(ISD::BITCAST, DL, DestEltVT, Elt));
3812  }
3813 
3814  return DAG.getBuildVector(DestVT, SL, CastedElts);
3815  }
3816  }
3817  }
3818 
3819  if (DestVT.getSizeInBits() != 64 && !DestVT.isVector())
3820  break;
3821 
3822  // Fold bitcasts of constants.
3823  //
3824  // v2i32 (bitcast i64:k) -> build_vector lo_32(k), hi_32(k)
3825  // TODO: Generalize and move to DAGCombiner
3826  SDValue Src = N->getOperand(0);
3827  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src)) {
3828  if (Src.getValueType() == MVT::i64) {
3829  SDLoc SL(N);
3830  uint64_t CVal = C->getZExtValue();
3832  DAG.getConstant(Lo_32(CVal), SL, MVT::i32),
3833  DAG.getConstant(Hi_32(CVal), SL, MVT::i32));
3834  return DAG.getNode(ISD::BITCAST, SL, DestVT, BV);
3835  }
3836  }
3837 
3838  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Src)) {
3839  const APInt &Val = C->getValueAPF().bitcastToAPInt();
3840  SDLoc SL(N);
3841  uint64_t CVal = Val.getZExtValue();
3843  DAG.getConstant(Lo_32(CVal), SL, MVT::i32),
3844  DAG.getConstant(Hi_32(CVal), SL, MVT::i32));
3845 
3846  return DAG.getNode(ISD::BITCAST, SL, DestVT, Vec);
3847  }
3848 
3849  break;
3850  }
3851  case ISD::SHL: {
3853  break;
3854 
3855  return performShlCombine(N, DCI);
3856  }
3857  case ISD::SRL: {
3859  break;
3860 
3861  return performSrlCombine(N, DCI);
3862  }
3863  case ISD::SRA: {
3865  break;
3866 
3867  return performSraCombine(N, DCI);
3868  }
3869  case ISD::TRUNCATE:
3870  return performTruncateCombine(N, DCI);
3871  case ISD::MUL:
3872  return performMulCombine(N, DCI);
3873  case ISD::MULHS:
3874  return performMulhsCombine(N, DCI);
3875  case ISD::MULHU:
3876  return performMulhuCombine(N, DCI);
3877  case AMDGPUISD::MUL_I24:
3878  case AMDGPUISD::MUL_U24:
3879  case AMDGPUISD::MULHI_I24:
3880  case AMDGPUISD::MULHI_U24: {
3881  if (SDValue V = simplifyI24(N, DCI))
3882  return V;
3883  return SDValue();
3884  }
3887  return performMulLoHi24Combine(N, DCI);
3888  case ISD::SELECT:
3889  return performSelectCombine(N, DCI);
3890  case ISD::FNEG:
3891  return performFNegCombine(N, DCI);
3892  case ISD::FABS:
3893  return performFAbsCombine(N, DCI);
3894  case AMDGPUISD::BFE_I32:
3895  case AMDGPUISD::BFE_U32: {
3896  assert(!N->getValueType(0).isVector() &&
3897  "Vector handling of BFE not implemented");
3899  if (!Width)
3900  break;
3901 
3902  uint32_t WidthVal = Width->getZExtValue() & 0x1f;
3903  if (WidthVal == 0)
3904  return DAG.getConstant(0, DL, MVT::i32);
3905 
3907  if (!Offset)
3908  break;
3909 
3910  SDValue BitsFrom = N->getOperand(0);
3911  uint32_t OffsetVal = Offset->getZExtValue() & 0x1f;
3912 
3913  bool Signed = N->getOpcode() == AMDGPUISD::BFE_I32;
3914 
3915  if (OffsetVal == 0) {
3916  // This is already sign / zero extended, so try to fold away extra BFEs.
3917  unsigned SignBits = Signed ? (32 - WidthVal + 1) : (32 - WidthVal);
3918 
3919  unsigned OpSignBits = DAG.ComputeNumSignBits(BitsFrom);
3920  if (OpSignBits >= SignBits)
3921  return BitsFrom;
3922 
3923  EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), WidthVal);
3924  if (Signed) {
3925  // This is a sign_extend_inreg. Replace it to take advantage of existing
3926  // DAG Combines. If not eliminated, we will match back to BFE during
3927  // selection.
3928 
3929  // TODO: The sext_inreg of extended types ends, although we can could
3930  // handle them in a single BFE.
3931  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, BitsFrom,
3932  DAG.getValueType(SmallVT));
3933  }
3934 
3935  return DAG.getZeroExtendInReg(BitsFrom, DL, SmallVT);
3936  }
3937 
3938  if (ConstantSDNode *CVal = dyn_cast<ConstantSDNode>(BitsFrom)) {
3939  if (Signed) {
3940  return constantFoldBFE<int32_t>(DAG,
3941  CVal->getSExtValue(),
3942  OffsetVal,
3943  WidthVal,
3944  DL);
3945  }
3946 
3947  return constantFoldBFE<uint32_t>(DAG,
3948  CVal->getZExtValue(),
3949  OffsetVal,
3950  WidthVal,
3951  DL);
3952  }
3953 
3954  if ((OffsetVal + WidthVal) >= 32 &&
3955  !(Subtarget->hasSDWA() && OffsetVal == 16 && WidthVal == 16)) {
3956  SDValue ShiftVal = DAG.getConstant(OffsetVal, DL, MVT::i32);
3957  return DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, MVT::i32,
3958  BitsFrom, ShiftVal);
3959  }
3960 
3961  if (BitsFrom.hasOneUse()) {
3962  APInt Demanded = APInt::getBitsSet(32,
3963  OffsetVal,
3964  OffsetVal + WidthVal);
3965 
3966  KnownBits Known;
3968  !DCI.isBeforeLegalizeOps());
3969  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3970  if (TLI.ShrinkDemandedConstant(BitsFrom, Demanded, TLO) ||
3971  TLI.SimplifyDemandedBits(BitsFrom, Demanded, Known, TLO)) {
3972  DCI.CommitTargetLoweringOpt(TLO);
3973  }
3974  }
3975 
3976  break;
3977  }
3978  case ISD::LOAD:
3979  return performLoadCombine(N, DCI);
3980  case ISD::STORE:
3981  return performStoreCombine(N, DCI);
3982  case AMDGPUISD::RCP:
3983  case AMDGPUISD::RCP_IFLAG:
3984  return performRcpCombine(N, DCI);
3985  case ISD::AssertZext:
3986  case ISD::AssertSext:
3987  return performAssertSZExtCombine(N, DCI);
3988  }
3989  return SDValue();
3990 }
3991 
3992 //===----------------------------------------------------------------------===//
3993 // Helper functions
3994 //===----------------------------------------------------------------------===//
3995 
3997  const TargetRegisterClass *RC,
3998  unsigned Reg, EVT VT,
3999  const SDLoc &SL,
4000  bool RawReg) const {
4001  MachineFunction &MF = DAG.getMachineFunction();
4003  unsigned VReg;
4004 
4005  if (!MRI.isLiveIn(Reg)) {
4006  VReg = MRI.createVirtualRegister(RC);
4007  MRI.addLiveIn(Reg, VReg);
4008  } else {
4009  VReg = MRI.getLiveInVirtReg(Reg);
4010  }
4011 
4012  if (RawReg)
4013  return DAG.getRegister(VReg, VT);
4014 
4015  return DAG.getCopyFromReg(DAG.getEntryNode(), SL, VReg, VT);
4016 }
4017 
4019  EVT VT,
4020  const SDLoc &SL,
4021  int64_t Offset) const {
4022  MachineFunction &MF = DAG.getMachineFunction();
4023  MachineFrameInfo &MFI = MF.getFrameInfo();
4024 
4025  int FI = MFI.CreateFixedObject(VT.getStoreSize(), Offset, true);
4026  auto SrcPtrInfo = MachinePointerInfo::getStack(MF, Offset);
4027  SDValue Ptr = DAG.getFrameIndex(FI, MVT::i32);
4028 
4029  return DAG.getLoad(VT, SL, DAG.getEntryNode(), Ptr, SrcPtrInfo, 4,
4032 }
4033 
4035  const SDLoc &SL,
4036  SDValue Chain,
4037  SDValue ArgVal,
4038  int64_t Offset) const {
4039  MachineFunction &MF = DAG.getMachineFunction();
4040  MachinePointerInfo DstInfo = MachinePointerInfo::getStack(MF, Offset);
4041 
4042  SDValue Ptr = DAG.getConstant(Offset, SL, MVT::i32);
4043  SDValue Store = DAG.getStore(Chain, SL, ArgVal, Ptr, DstInfo, 4,
4045  return Store;
4046 }
4047 
4049  const TargetRegisterClass *RC,
4050  EVT VT, const SDLoc &SL,
4051  const ArgDescriptor &Arg) const {
4052  assert(Arg && "Attempting to load missing argument");
4053 
4054  if (Arg.isRegister())
4055  return CreateLiveInRegister(DAG, RC, Arg.getRegister(), VT, SL);
4056  return loadStackInputValue(DAG, VT, SL, Arg.getStackOffset());
4057 }
4058 
4060  const MachineFunction &MF, const ImplicitParameter Param) const {
4062  const AMDGPUSubtarget &ST =
4064  unsigned ExplicitArgOffset = ST.getExplicitKernelArgOffset(MF.getFunction());
4065  unsigned Alignment = ST.getAlignmentForImplicitArgPtr();
4066  uint64_t ArgOffset = alignTo(MFI->getExplicitKernArgSize(), Alignment) +
4067  ExplicitArgOffset;
4068  switch (Param) {
4069  case GRID_DIM:
4070  return ArgOffset;
4071  case GRID_OFFSET:
4072  return ArgOffset + 4;
4073  }
4074  llvm_unreachable("unexpected implicit parameter type");
4075 }
4076 
4077 #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
4078 
4079 const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
4080  switch ((AMDGPUISD::NodeType)Opcode) {
4081  case AMDGPUISD::FIRST_NUMBER: break;
4082  // AMDIL DAG nodes
4085 
4086  // AMDGPU DAG nodes
4220 
4222  }
4223  return nullptr;
4224 }
4225 
4227  SelectionDAG &DAG, int Enabled,
4228  int &RefinementSteps,
4229  bool &UseOneConstNR,
4230  bool Reciprocal) const {
4231  EVT VT = Operand.getValueType();
4232 
4233  if (VT == MVT::f32) {
4234  RefinementSteps = 0;
4235  return DAG.getNode(AMDGPUISD::RSQ, SDLoc(Operand), VT, Operand);
4236  }
4237 
4238  // TODO: There is also f64 rsq instruction, but the documentation is less
4239  // clear on its precision.
4240 
4241  return SDValue();
4242 }
4243 
4245  SelectionDAG &DAG, int Enabled,
4246  int &RefinementSteps) const {
4247  EVT VT = Operand.getValueType();
4248 
4249  if (VT == MVT::f32) {
4250  // Reciprocal, < 1 ulp error.
4251  //
4252  // This reciprocal approximation converges to < 0.5 ulp error with one
4253  // newton rhapson performed with two fused multiple adds (FMAs).
4254 
4255  RefinementSteps = 0;
4256  return DAG.getNode(AMDGPUISD::RCP, SDLoc(Operand), VT, Operand);
4257  }
4258 
4259  // TODO: There is also f64 rcp instruction, but the documentation is less
4260  // clear on its precision.
4261 
4262  return SDValue();
4263 }
4264 
4266  const SDValue Op, KnownBits &Known,
4267  const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
4268 
4269  Known.resetAll(); // Don't know anything.
4270 
4271  unsigned Opc = Op.getOpcode();
4272 
4273  switch (Opc) {
4274  default:
4275  break;
4276  case AMDGPUISD::CARRY:
4277  case AMDGPUISD::BORROW: {
4278  Known.Zero = APInt::getHighBitsSet(32, 31);
4279  break;
4280  }
4281 
4282  case AMDGPUISD::BFE_I32:
4283  case AMDGPUISD::BFE_U32: {
4285  if (!CWidth)
4286  return;
4287 
4288  uint32_t Width = CWidth->getZExtValue() & 0x1f;
4289 
4290  if (Opc == AMDGPUISD::BFE_U32)
4291  Known.Zero = APInt::getHighBitsSet(32, 32 - Width);
4292 
4293  break;
4294  }
4295  case AMDGPUISD::FP_TO_FP16:
4296  case AMDGPUISD::FP16_ZEXT: {
4297  unsigned BitWidth = Known.getBitWidth();
4298 
4299  // High bits are zero.
4300  Known.Zero = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
4301  break;
4302  }
4303  case AMDGPUISD::MUL_U24:
4304  case AMDGPUISD::MUL_I24: {
4305  KnownBits LHSKnown = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
4306  KnownBits RHSKnown = DAG.computeKnownBits(Op.getOperand(1), Depth + 1);
4307  unsigned TrailZ = LHSKnown.countMinTrailingZeros() +
4308  RHSKnown.countMinTrailingZeros();
4309  Known.Zero.setLowBits(std::min(TrailZ, 32u));
4310 
4311  // Truncate to 24 bits.
4312  LHSKnown = LHSKnown.trunc(24);
4313  RHSKnown = RHSKnown.trunc(24);
4314 
4315  bool Negative = false;
4316  if (Opc == AMDGPUISD::MUL_I24) {
4317  unsigned LHSValBits = 24 - LHSKnown.countMinSignBits();
4318  unsigned RHSValBits = 24 - RHSKnown.countMinSignBits();
4319  unsigned MaxValBits = std::min(LHSValBits + RHSValBits, 32u);
4320  if (MaxValBits >= 32)
4321  break;
4322  bool LHSNegative = LHSKnown.isNegative();
4323  bool LHSPositive = LHSKnown.isNonNegative();
4324  bool RHSNegative = RHSKnown.isNegative();
4325  bool RHSPositive = RHSKnown.isNonNegative();
4326  if ((!LHSNegative && !LHSPositive) || (!RHSNegative && !RHSPositive))
4327  break;
4328  Negative = (LHSNegative && RHSPositive) || (LHSPositive && RHSNegative);
4329  if (Negative)
4330  Known.One.setHighBits(32 - MaxValBits);
4331  else
4332  Known.Zero.setHighBits(32 - MaxValBits);
4333  } else {
4334  unsigned LHSValBits = 24 - LHSKnown.countMinLeadingZeros();
4335  unsigned RHSValBits = 24 - RHSKnown.countMinLeadingZeros();
4336  unsigned MaxValBits = std::min(LHSValBits + RHSValBits, 32u);
4337  if (MaxValBits >= 32)
4338  break;
4339  Known.Zero.setHighBits(32 - MaxValBits);
4340  }
4341  break;
4342  }
4343  case AMDGPUISD::PERM: {
4345  if (!CMask)
4346  return;
4347 
4348  KnownBits LHSKnown = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
4349  KnownBits RHSKnown = DAG.computeKnownBits(Op.getOperand(1), Depth + 1);
4350  unsigned Sel = CMask->getZExtValue();
4351 
4352  for (unsigned I = 0; I < 32; I += 8) {
4353  unsigned SelBits = Sel & 0xff;
4354  if (SelBits < 4) {
4355  SelBits *= 8;
4356  Known.One |= ((RHSKnown.One.getZExtValue() >> SelBits) & 0xff) << I;
4357  Known.Zero |= ((RHSKnown.Zero.getZExtValue() >> SelBits) & 0xff) << I;
4358  } else if (SelBits < 7) {
4359  SelBits = (SelBits & 3) * 8;
4360  Known.One |= ((LHSKnown.One.getZExtValue() >> SelBits) & 0xff) << I;
4361  Known.Zero |= ((LHSKnown.Zero.getZExtValue() >> SelBits) & 0xff) << I;
4362  } else if (SelBits == 0x0c) {
4363  Known.Zero |= 0xff << I;
4364  } else if (SelBits > 0x0c) {
4365  Known.One |= 0xff << I;
4366  }
4367  Sel >>= 8;
4368  }
4369  break;
4370  }
4371  case ISD::INTRINSIC_WO_CHAIN: {
4372  unsigned IID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4373  switch (IID) {
4376  const GCNSubtarget &ST =
4378  // These return at most the wavefront size - 1.
4379  unsigned Size = Op.getValueType().getSizeInBits();
4380  Known.Zero.setHighBits(Size - ST.getWavefrontSizeLog2());
4381  break;
4382  }
4383  default:
4384  break;
4385  }
4386  }
4387  }
4388 }
4389 
4391  SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4392  unsigned Depth) const {
4393  switch (Op.getOpcode()) {
4394  case AMDGPUISD::BFE_I32: {
4396  if (!Width)
4397  return 1;
4398 
4399  unsigned SignBits = 32 - Width->getZExtValue() + 1;
4400  if (!isNullConstant(Op.getOperand(1)))
4401  return SignBits;
4402 
4403  // TODO: Could probably figure something out with non-0 offsets.
4404  unsigned Op0SignBits = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
4405  return std::max(SignBits, Op0SignBits);
4406  }
4407 
4408  case AMDGPUISD::BFE_U32: {
4410  return Width ? 32 - (Width->getZExtValue() & 0x1f) : 1;
4411  }
4412 
4413  case AMDGPUISD::CARRY:
4414  case AMDGPUISD::BORROW:
4415  return 31;
4416  case AMDGPUISD::FP_TO_FP16:
4417  case AMDGPUISD::FP16_ZEXT:
4418  return 16;
4419  default:
4420  return 1;
4421  }
4422 }
4423 
4425  const SelectionDAG &DAG,
4426  bool SNaN,
4427  unsigned Depth) const {
4428  unsigned Opcode = Op.getOpcode();
4429  switch (Opcode) {
4431  case AMDGPUISD::FMAX_LEGACY: {
4432  if (SNaN)
4433  return true;
4434 
4435  // TODO: Can check no nans on one of the operands for each one, but which
4436  // one?
4437  return false;
4438  }
4441  if (SNaN)
4442  return true;
4443  return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
4444  DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
4445  }
4446  case AMDGPUISD::FMED3:
4447  case AMDGPUISD::FMIN3:
4448  case AMDGPUISD::FMAX3:
4449  case AMDGPUISD::FMAD_FTZ: {
4450  if (SNaN)
4451  return true;
4452  return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
4453  DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
4454  DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
4455  }
4460  return true;
4461 
4462  case AMDGPUISD::RCP:
4463  case AMDGPUISD::RSQ:
4464  case AMDGPUISD::RCP_LEGACY:
4465  case AMDGPUISD::RSQ_LEGACY:
4466  case AMDGPUISD::RSQ_CLAMP: {
4467  if (SNaN)
4468  return true;
4469 
4470  // TODO: Need is known positive check.
4471  return false;
4472  }
4473  case AMDGPUISD::LDEXP:
4474  case AMDGPUISD::FRACT: {
4475  if (SNaN)
4476  return true;
4477  return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
4478  }
4479  case AMDGPUISD::DIV_SCALE:
4480  case AMDGPUISD::DIV_FMAS:
4481  case AMDGPUISD::DIV_FIXUP:
4482  case AMDGPUISD::TRIG_PREOP:
4483  // TODO: Refine on operands.
4484  return SNaN;
4485  case AMDGPUISD::SIN_HW:
4486  case AMDGPUISD::COS_HW: {
4487  // TODO: Need check for infinity
4488  return SNaN;
4489  }
4490  case ISD::INTRINSIC_WO_CHAIN: {
4491  unsigned IntrinsicID
4492  = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4493  // TODO: Handle more intrinsics
4494  switch (IntrinsicID) {
4496  return true;
4497 
4499  if (SNaN)
4500  return true;
4501  return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
4502  }
4504  if (SNaN)
4505  return true;
4506  return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
4507  DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
4508  }
4510  // TODO: Refine on operand
4511  return SNaN;
4512  default:
4513  return false;
4514  }
4515  }
4516  default:
4517  return false;
4518  }
4519 }
4520 
4523  if (RMW->getOperation() == AtomicRMWInst::Nand)
4526 }
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:571
bool isInvariant() const
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:538
SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const
SDValue LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, bool Signed) const
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:594
EVT getValueType() const
Return the ValueType of the referenced return value.
SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const
bool isInteger() const
Return true if this is an integer or a vector integer type.
SDValue LowerFREM(SDValue Op, SelectionDAG &DAG) const
Split a vector store into multiple scalar stores.
SDValue performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS, SDValue RHS, DAGCombinerInfo &DCI) const
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool shouldCombineMemoryType(EVT VT) const
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:35
unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalValue &GV)
const GlobalValue * getGlobal() const
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant, which is required to be operand #1) half of the integer or float value specified as operand #0.
Definition: ISDOpcodes.h:184
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1563
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
static SDValue distributeOpThroughSelect(TargetLowering::DAGCombinerInfo &DCI, unsigned Op, const SDLoc &SL, SDValue Cond, SDValue N1, SDValue N2)
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
unsigned getAddrSpace() const
Diagnostic information for unsupported feature in backend.
SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, unsigned Reg, EVT VT, const SDLoc &SL, bool RawReg=false) const
Helper function that adds Reg to the LiveIn list of the DAG&#39;s MachineFunction.
SDValue performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const
AMDGPU specific subclass of TargetSubtarget.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond)
Helper function to make it easier to build SetCC&#39;s if you just have an ISD::CondCode instead of an SD...
Definition: SelectionDAG.h:937
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
SDValue getHiHalf64(SDValue Op, SelectionDAG &DAG) const
SDValue LowerFLOG(SDValue Op, SelectionDAG &DAG, double Log2BaseInverted) const
static bool allocateVGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtType, EVT ExtVT) const override
Return true if it is profitable to reduce a load to a smaller type.
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:358
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
This class represents lattice values for constants.
Definition: AllocatorList.h:24
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types...
SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:367
iterator begin() const
begin/end - Return all of the registers in this class.
SDValue performMulhsCombine(SDNode *N, DAGCombinerInfo &DCI) const
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:260
SDValue LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
bool isVector() const
Return true if this is a vector value type.
void addLiveIn(unsigned Reg, unsigned vreg=0)
addLiveIn - Add the specified register as a live-in.
const SDValue & getBasePtr() const
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:289
bool isNegative() const
Return true if the value is negative.
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:223
SDValue loadInputValue(SelectionDAG &DAG, const TargetRegisterClass *RC, EVT VT, const SDLoc &SL, const ArgDescriptor &Arg) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
This file describes how to lower LLVM calls to machine code calls.
const SDValue & getValue() const
SDVTList getVTList() const
unsigned Reg
static CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg)
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:648
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
static SDValue getLog2EVal(SelectionDAG &DAG, const SDLoc &SL, EVT VT)
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
Address space for 32-bit constant memory.
Definition: AMDGPU.h:263
unsigned getVectorNumElements() const
const SDValue & getChain() const
Offsets
Offsets in bytes from the start of the input buffer.
Definition: SIInstrInfo.h:1025
Function Alias Analysis Results
unsigned getAlignment() const
static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:141
SDValue performAssertSZExtCombine(SDNode *N, DAGCombinerInfo &DCI) const
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select&#39;s if you just have operands and don&#39;t want to check...
Definition: SelectionDAG.h:950
const fltSemantics & getSemantics() const
Definition: APFloat.h:1155
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:685
static SDValue extractF64Exponent(SDValue Hi, const SDLoc &SL, SelectionDAG &DAG)
[US]{MIN/MAX} - Binary minimum or maximum or signed or unsigned integers.
Definition: ISDOpcodes.h:384
const SDNodeFlags getFlags() const
an instruction that atomically reads a memory location, combines it with another value, and then stores the result back.
Definition: Instructions.h:692
MachineFunction & getMachineFunction() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:212
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
CLAMP value between 0.0 and 1.0.
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:45
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:435
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:39
SDValue performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const
EVT getPow2VectorType(LLVMContext &Context) const
Widens the length of the given vector EVT up to the nearest power of 2 and returns that type...
Definition: ValueTypes.h:366
bool mayIgnoreSignedZero(SDValue Op) const
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool hasVOP3PInsts() const
bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem, unsigned AS) const override
Return true if it is expected to be cheaper to do a store of a non-zero vector constant with the give...
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:136
bool isCheapToSpeculateCttz() const override
Return true if it is cheap to speculate a call to intrinsic cttz.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
#define AMDGPU_LN2_F
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:210
#define LLVM_READNONE
Definition: Compiler.h:177
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:136
SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:192
static unsigned numBitsUnsigned(SDValue Op, SelectionDAG &DAG)
static const AMDGPUSubtarget & get(const MachineFunction &MF)
SDValue performTruncateCombine(SDNode *N, DAGCombinerInfo &DCI) const
static uint32_t getAlignment(const MCSectionCOFF &Sec)
Pointer to the start of the shader&#39;s constant data.
Address space for constant memory (VTX2)
Definition: AMDGPU.h:259
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:198
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:371
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition: APInt.h:1447
SDValue performMulhuCombine(SDNode *N, DAGCombinerInfo &DCI) const
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
Shift and rotation operations.
Definition: ISDOpcodes.h:410
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:137
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:244
bool isKnownNeverNaNForTargetNode(SDValue Op, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const override
If SNaN is false,.
BinOp getOperation() const
Definition: Instructions.h:745
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:191
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:170
void addLoc(const CCValAssign &V)
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
SimpleValueType SimpleTy
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:304
static bool isI24(SDValue Op, SelectionDAG &DAG)
The memory access is dereferenceable (i.e., doesn&#39;t trap).
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:460
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:401
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
SDValue loadStackInputValue(SelectionDAG &DAG, EVT VT, const SDLoc &SL, int64_t Offset) const
Similar to CreateLiveInRegister, except value maybe loaded from a stack slot rather than passed in a ...
SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const
Interface to describe a layout of a stack frame on an AMDGPU target.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:667
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SmallVector< ISD::InputArg, 32 > Ins
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
TargetRegisterInfo interface that is implemented by all hw codegen targets.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
unsigned countMinSignBits() const
Returns the number of times the sign bit is replicated into the other bits.
Definition: KnownBits.h:157
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
static bool allocateCCRegs(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, const TargetRegisterClass *RC, unsigned NumRegs)
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:298
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use...
Definition: CallingConv.h:221
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const
Split a vector load into 2 loads of half the vector.
bool isCheapToSpeculateCtlz() const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:398
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:478
#define AMDGPU_LN10_F
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:429
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
static bool hasVolatileUser(SDNode *Val)
uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:852
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue is known to never be NaN.
std::pair< SDValue, SDValue > split64BitValue(SDValue Op, SelectionDAG &DAG) const
Return 64-bit value Op as two 32-bit integers.
static mvt_range integer_vector_valuetypes()
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:959
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:123
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors...
static SDValue getMul24(SelectionDAG &DAG, const SDLoc &SL, SDValue N0, SDValue N1, unsigned Size, bool Signed)
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const
static SDValue simplifyI24(SDNode *Node24, TargetLowering::DAGCombinerInfo &DCI)
ArrayRef< SDUse > ops() const
amdgpu Simplify well known AMD library false Value * Callee
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:189
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:84
SDValue GetDemandedBits(SDValue V, const APInt &Mask)
See if the specified operand can be simplified with the knowledge that only the bits specified by Mas...
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const
This class is used to represent ISD::STORE nodes.
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:524
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
static LLVM_READNONE bool fnegFoldsIntoOp(unsigned Opc)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:327
static bool isCtlzOpc(unsigned Opc)
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
bool isNegative() const
Returns true if this value is known to be negative.
Definition: KnownBits.h:96
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:636
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits...
static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0, uint32_t Offset, uint32_t Width, const SDLoc &DL)
SDValue LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG, bool Signed) const
bool isSDNodeAlwaysUniform(const SDNode *N) const override
SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:610
SDValue LowerFROUND32_16(SDValue Op, SelectionDAG &DAG) const
bool hasFP32Denormals() const
const SDValue & getBasePtr() const
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:235
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
Simple binary floating point operators.
Definition: ISDOpcodes.h:283
bool hasFminFmaxLegacy() const
SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const
SDValue storeStackInputValue(SelectionDAG &DAG, const SDLoc &SL, SDValue Chain, SDValue ArgVal, int64_t Offset) const
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:66
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
const SDValue & getOperand(unsigned Num) const
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:57
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:934
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:247
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:232
LLVMContext & getContext() const
bool has16BitInsts() const
static unsigned inverseMinMax(unsigned Opc)
static bool isUniformMMO(const MachineMemOperand *MMO)
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
static bool hasDefinedInitializer(const GlobalValue *GV)
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
SDValue lowerFEXP(SDValue Op, SelectionDAG &DAG) const
This class provides iterator support for SDUse operands that use a specific SDNode.
Address space for local memory.
Definition: AMDGPU.h:260
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:50
KnownBits trunc(unsigned BitWidth)
Truncate the underlying known Zero and One bits.
Definition: KnownBits.h:113
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:216
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using &#39;From&#39; to use &#39;To&#39; instead.
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:767
static bool allocateSGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:57
bool isLoadBitCastBeneficial(EVT, EVT) const final
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
The AMDGPU TargetMachine interface definition for hw codgen targets.
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
unsigned MaxStoresPerMemmove
Specify maximum bytes of store instructions per memmove call.
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:416
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo...
Definition: ISDOpcodes.h:796
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:556
Extended Value Type.
Definition: ValueTypes.h:34
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:256
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:195
bool isConstantCostlierToNegate(SDValue N) const
This structure contains all information that is necessary for lowering calls.
size_t size() const
Definition: SmallVector.h:53
bool isVolatile() const
const TargetMachine & getTargetMachine() const
This class contains a discriminated union of information about pointers in memory operands...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override
Returns true if the target can instruction select the specified FP immediate natively.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value...
unsigned GatherAllAliasesMaxDepth
Depth that GatherAllAliases should should continue looking for chain dependencies when trying to find...
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
static bool isCttzOpc(unsigned Opc)
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)
Selects the correct CCAssignFn for a given CallingConvention value.
unsigned getAddressSpace() const
unsigned getStackOffset() const
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type...
SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, MachineFrameInfo &MFI, int ClobberedFI) const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static LLVM_READONLY bool hasSourceMods(const SDNode *N)
static bool isU24(SDValue Op, SelectionDAG &DAG)
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:120
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:404
SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const
unsigned getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:310
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
CCState - This class holds information needed while lowering arguments and return values...
SDValue scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:117
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:319
bool isFAbsFree(EVT VT) const override
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, unsigned Align=1, bool *=nullptr) const
Determine if the target supports unaligned memory accesses.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:339
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:265
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provides VTs and return the low/high part...
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:222
SDValue lowerUnhandledCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals, StringRef Reason) const
Interface definition of the TargetLowering class that is common to all AMD GPUs.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:734
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
const DebugLoc & getDebugLoc() const
SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:730
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on two values, following the IEEE-754 2008 definition.
Definition: ISDOpcodes.h:600
const DataFlowGraph & G
Definition: RDFGraph.cpp:211
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:413
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:581
This is an abstract virtual class for memory operations.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
void setHasMultipleConditionRegisters(bool hasManyRegs=true)
Tells the code generator that the target has multiple (allocatable) condition registers that can be u...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0)
Append the extracted elements from Start to Count out of the vector Op in Args.
Represents one node in the SelectionDAG.
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
Return the operation corresponding to !(X op Y), where &#39;op&#39; is a valid SetCC operation.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
double BitsToDouble(uint64_t Bits)
This function takes a 64-bit integer and returns the bit equivalent double.
Definition: MathExtras.h:573
const Function & getFunction() const
Return the LLVM function that this machine code represents.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static mvt_range integer_valuetypes()
static bool allUsesHaveSourceMods(const SDNode *N, unsigned CostThreshold=4)
MachinePointerInfo getWithOffset(int64_t O) const
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:539
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
#define AMDGPU_LOG2E_F
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT...
Definition: ValueTypes.h:73
EVT getMemoryVT() const
Return the type of the in-memory value.
SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Class for arbitrary precision integers.
Definition: APInt.h:70
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1223
iterator_range< use_iterator > uses()
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:420
Interface for the AMDGPU Implementation of the Intrinsic Info class.
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:468
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:471
static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG)
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors...
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:312
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:607
amdgpu Simplify well known AMD library false Value Value * Arg
static SDValue stripBitcast(SDValue Val)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
static CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg)
bool isConstantValueOfAnyType(SDValue N)
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:436
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:638
bool isLiveIn(unsigned Reg) const
Provides AMDGPU specific target descriptions.
SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const
SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const
Represents a use of a SDNode.
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
bool hasInv2PiInlineImm() const
Interface definition for SIInstrInfo.
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
const MachinePointerInfo & getPointerInfo() const
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:387
bool isNarrowingProfitable(EVT VT1, EVT VT2) const override
Return true if it&#39;s profitable to narrow operations of type VT1 to VT2.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:705
SelectSupportKind
Enum that describes what type of support for selects the target has.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
SDValue getLoHalf64(SDValue Op, SelectionDAG &DAG) const
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:206
SDValue LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const
SDValue performMulLoHi24Combine(SDNode *N, DAGCombinerInfo &DCI) const
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:486
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, unsigned Offset, MVT LocVT, LocInfo HTP)
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:208
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:614
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
TargetOptions Options
Definition: TargetMachine.h:97
SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL, unsigned Opc, SDValue LHS, uint32_t ValLo, uint32_t ValHi) const
Split the 64-bit value LHS into two 32-bit components, and perform the binary operation Opc to it wit...
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Flags getFlags() const
Return the raw flags of the source value,.
#define LLVM_READONLY
Definition: Compiler.h:184
The memory access always returns the same value (or traps).
bool isTruncateFree(EVT Src, EVT Dest) const override
AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI)
#define NODE_NAME_CASE(node)
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
uint32_t Size
Definition: Profile.cpp:47
static bool isInv2Pi(const APFloat &APF)
unsigned getOpcode() const
SDValue getValue(unsigned R) const
unsigned MaxStoresPerMemcpy
Specify maximum bytes of store instructions per memcpy call.
static bool isNegativeOne(SDValue Val)
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
bool isFNegFree(EVT VT) const override
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:345
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if &#39;Op & Mask&#39; is known to be zero.
unsigned getRegister() const
SDValue performSraCombine(SDNode *N, DAGCombinerInfo &DCI) const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
static LLVM_READONLY bool opMustUseVOP3Encoding(const SDNode *N, MVT VT)
returns true if the operation will definitely need to use a 64-bit encoding, and thus will use a VOP3...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
void print(raw_ostream &OS, const SelectionDAG *G=nullptr) const
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:284
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566
LLVM Value Representation.
Definition: Value.h:73
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:302
SDValue getRegister(unsigned Reg, EVT VT)
Address space for region memory. (GDS)
Definition: AMDGPU.h:257
bool hasInitializer() const
Definitions have initializers, declarations don&#39;t.
bool bitwiseIsEqual(const APFloat &RHS) const
Definition: APFloat.h:1112
EVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type...
Definition: ValueTypes.h:115
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC&#39;s if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:962
SDValue getValueType(EVT)
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
SDValue LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG, bool Signed) const
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone...
static EVT getEquivalentMemType(LLVMContext &Context, EVT VT)
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:146
static SDValue foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI, SDValue N)
bool isZExtFree(Type *Src, Type *Dest) const override
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations...
Definition: ISDOpcodes.h:306
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:99
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:443
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const
Split a vector store into 2 stores of half the vector.
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
Conversion operators.
Definition: ISDOpcodes.h:465
const SDValue & getOperand(unsigned i) const
bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:474
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Op, int64_t Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object...
Definition: SelectionDAG.h:806
SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const
unsigned AllocateReg(unsigned Reg)
AllocateReg - Attempt to allocate one register.
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all...
unsigned getLiveInVirtReg(unsigned PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in physical ...
bool ShouldShrinkFPConstant(EVT VT) const override
If true, then instruction selection should seek to shrink the FP constant of the specified type to a ...
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:584
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
MVT getVectorIdxTy(const DataLayout &) const override
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:131
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:201
unsigned getWavefrontSizeLog2() const
LLVMContext * getContext() const
Definition: SelectionDAG.h:407
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1442
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:242
iterator_range< arg_iterator > args()
Definition: Function.h:689
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT...
Definition: ValueTypes.h:328
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const
bool isSelectSupported(SelectSupportKind) const override
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:380
BRIND - Indirect branch.
Definition: ISDOpcodes.h:634
This class is used to represent ISD::LOAD nodes.
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary...
Definition: ISDOpcodes.h:623