LLVM  8.0.1
SystemZISelLowering.cpp
Go to the documentation of this file.
1 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the SystemZTargetLowering class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "SystemZISelLowering.h"
15 #include "SystemZCallingConv.h"
18 #include "SystemZTargetMachine.h"
23 #include "llvm/IR/Intrinsics.h"
24 #include "llvm/IR/IntrinsicInst.h"
26 #include "llvm/Support/KnownBits.h"
27 #include <cctype>
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "systemz-lower"
32 
33 namespace {
34 // Represents information about a comparison.
35 struct Comparison {
36  Comparison(SDValue Op0In, SDValue Op1In)
37  : Op0(Op0In), Op1(Op1In), Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
38 
39  // The operands to the comparison.
40  SDValue Op0, Op1;
41 
42  // The opcode that should be used to compare Op0 and Op1.
43  unsigned Opcode;
44 
45  // A SystemZICMP value. Only used for integer comparisons.
46  unsigned ICmpType;
47 
48  // The mask of CC values that Opcode can produce.
49  unsigned CCValid;
50 
51  // The mask of CC values for which the original condition is true.
52  unsigned CCMask;
53 };
54 } // end anonymous namespace
55 
56 // Classify VT as either 32 or 64 bit.
57 static bool is32Bit(EVT VT) {
58  switch (VT.getSimpleVT().SimpleTy) {
59  case MVT::i32:
60  return true;
61  case MVT::i64:
62  return false;
63  default:
64  llvm_unreachable("Unsupported type");
65  }
66 }
67 
68 // Return a version of MachineOperand that can be safely used before the
69 // final use.
71  if (Op.isReg())
72  Op.setIsKill(false);
73  return Op;
74 }
75 
77  const SystemZSubtarget &STI)
78  : TargetLowering(TM), Subtarget(STI) {
79  MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize(0));
80 
81  // Set up the register classes.
82  if (Subtarget.hasHighWord())
83  addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
84  else
85  addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
86  addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
87  if (Subtarget.hasVector()) {
88  addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
89  addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
90  } else {
91  addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
92  addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
93  }
94  if (Subtarget.hasVectorEnhancements1())
95  addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
96  else
97  addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
98 
99  if (Subtarget.hasVector()) {
100  addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
101  addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
102  addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
103  addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
104  addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
105  addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
106  }
107 
108  // Compute derived properties from the register classes
110 
111  // Set up special registers.
113 
114  // TODO: It may be better to default to latency-oriented scheduling, however
115  // LLVM's current latency-oriented scheduler can't handle physreg definitions
116  // such as SystemZ has with CC, so set this to the register-pressure
117  // scheduler, because it can.
119 
122 
123  // Instructions are strings of 2-byte aligned 2-byte values.
125  // For performance reasons we prefer 16-byte alignment.
127 
128  // Handle operations that are handled in a similar way for all types.
129  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
131  ++I) {
132  MVT VT = MVT::SimpleValueType(I);
133  if (isTypeLegal(VT)) {
134  // Lower SET_CC into an IPM-based sequence.
136 
137  // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
139 
140  // Lower SELECT_CC and BR_CC into separate comparisons and branches.
143  }
144  }
145 
146  // Expand jump table branches as address arithmetic followed by an
147  // indirect jump.
149 
150  // Expand BRCOND into a BR_CC (see above).
152 
153  // Handle integer types.
154  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
156  ++I) {
157  MVT VT = MVT::SimpleValueType(I);
158  if (isTypeLegal(VT)) {
159  // Expand individual DIV and REMs into DIVREMs.
166 
167  // Support addition/subtraction with overflow.
170 
171  // Support addition/subtraction with carry.
174 
175  // Support carry in as value rather than glue.
178 
179  // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
180  // stores, putting a serialization instruction after the stores.
183 
184  // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
185  // available, or if the operand is constant.
187 
188  // Use POPCNT on z196 and above.
189  if (Subtarget.hasPopulationCount())
191  else
193 
194  // No special instructions for these.
197 
198  // Use *MUL_LOHI where possible instead of MULH*.
203 
204  // Only z196 and above have native support for conversions to unsigned.
205  // On z10, promoting to i64 doesn't generate an inexact condition for
206  // values that are outside the i32 range but in the i64 range, so use
207  // the default expansion.
208  if (!Subtarget.hasFPExtension())
210  }
211  }
212 
213  // Type legalization will convert 8- and 16-bit atomic operations into
214  // forms that operate on i32s (but still keeping the original memory VT).
215  // Lower them into full i32 operations.
227 
228  // Even though i128 is not a legal type, we still need to custom lower
229  // the atomic operations in order to exploit SystemZ instructions.
232 
233  // We can use the CC result of compare-and-swap to implement
234  // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
238 
240 
241  // Traps are legal, as we will convert them to "j .+2".
243 
244  // z10 has instructions for signed but not unsigned FP conversion.
245  // Handle unsigned 32-bit types as signed 64-bit types.
246  if (!Subtarget.hasFPExtension()) {
249  }
250 
251  // We have native support for a 64-bit CTLZ, via FLOGR.
254 
255  // Give LowerOperation the chance to replace 64-bit ORs with subregs.
257 
258  // FIXME: Can we support these natively?
262 
263  // We have native instructions for i8, i16 and i32 extensions, but not i1.
265  for (MVT VT : MVT::integer_valuetypes()) {
269  }
270 
271  // Handle the various types of symbolic address.
277 
278  // We need to handle dynamic allocations specially because of the
279  // 160-byte area at the bottom of the stack.
282 
283  // Use custom expanders so that we can force the function to use
284  // a frame pointer.
287 
288  // Handle prefetches with PFD or PFDRL.
290 
291  for (MVT VT : MVT::vector_valuetypes()) {
292  // Assume by default that all vector operations need to be expanded.
293  for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
294  if (getOperationAction(Opcode, VT) == Legal)
295  setOperationAction(Opcode, VT, Expand);
296 
297  // Likewise all truncating stores and extending loads.
298  for (MVT InnerVT : MVT::vector_valuetypes()) {
299  setTruncStoreAction(VT, InnerVT, Expand);
300  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
301  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
302  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
303  }
304 
305  if (isTypeLegal(VT)) {
306  // These operations are legal for anything that can be stored in a
307  // vector register, even if there is no native support for the format
308  // as such. In particular, we can do these for v4f32 even though there
309  // are no specific instructions for that format.
315 
316  // Likewise, except that we need to replace the nodes with something
317  // more specific.
320  }
321  }
322 
323  // Handle integer vector types.
324  for (MVT VT : MVT::integer_vector_valuetypes()) {
325  if (isTypeLegal(VT)) {
326  // These operations have direct equivalents.
331  if (VT != MVT::v2i64)
336  if (Subtarget.hasVectorEnhancements1())
338  else
342 
343  // Convert a GPR scalar to a vector by inserting it into element 0.
345 
346  // Use a series of unpacks for extensions.
349 
350  // Detect shifts by a scalar amount and convert them into
351  // V*_BY_SCALAR.
355 
356  // At present ROTL isn't matched by DAGCombiner. ROTR should be
357  // converted into ROTL.
360 
361  // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
362  // and inverting the result as necessary.
364  }
365  }
366 
367  if (Subtarget.hasVector()) {
368  // There should be no need to check for float types other than v2f64
369  // since <2 x f32> isn't a legal type.
378  }
379 
380  // Handle floating-point types.
381  for (unsigned I = MVT::FIRST_FP_VALUETYPE;
383  ++I) {
384  MVT VT = MVT::SimpleValueType(I);
385  if (isTypeLegal(VT)) {
386  // We can use FI for FRINT.
388 
389  // We can use the extended form of FI for other rounding operations.
390  if (Subtarget.hasFPExtension()) {
396  }
397 
398  // No special instructions for these.
404  }
405  }
406 
407  // Handle floating-point vector types.
408  if (Subtarget.hasVector()) {
409  // Scalar-to-vector conversion is just a subreg.
412 
413  // Some insertions and extractions can be done directly but others
414  // need to go via integers.
419 
420  // These operations have direct equivalents.
435  }
436 
437  // The vector enhancements facility 1 has instructions for these.
438  if (Subtarget.hasVectorEnhancements1()) {
453 
458 
463 
468 
473 
478  }
479 
480  // We have fused multiply-addition for f32 and f64 but not f128.
483  if (Subtarget.hasVectorEnhancements1())
485  else
487 
488  // We don't have a copysign instruction on vector registers.
489  if (Subtarget.hasVectorEnhancements1())
491 
492  // Needed so that we don't try to implement f128 constant loads using
493  // a load-and-extend of a f80 constant (in cases where the constant
494  // would fit in an f80).
495  for (MVT VT : MVT::fp_valuetypes())
497 
498  // We don't have extending load instruction on vector registers.
499  if (Subtarget.hasVectorEnhancements1()) {
502  }
503 
504  // Floating-point truncation and stores need to be done separately.
508 
509  // We have 64-bit FPR<->GPR moves, but need special handling for
510  // 32-bit forms.
511  if (!Subtarget.hasVector()) {
514  }
515 
516  // VASTART and VACOPY need to deal with the SystemZ-specific varargs
517  // structure, but VAEND is a no-op.
521 
522  // Codes for which we want to perform some z-specific combinations.
536 
537  // Handle intrinsics.
540 
541  // We want to use MVC in preference to even a single load/store pair.
542  MaxStoresPerMemcpy = 0;
544 
545  // The main memset sequence is a byte store followed by an MVC.
546  // Two STC or MV..I stores win over that, but the kind of fused stores
547  // generated by target-independent code don't when the byte value is
548  // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
549  // than "STC;MVC". Handle the choice in target-specific code instead.
550  MaxStoresPerMemset = 0;
552 }
553 
555  LLVMContext &, EVT VT) const {
556  if (!VT.isVector())
557  return MVT::i32;
559 }
560 
562  VT = VT.getScalarType();
563 
564  if (!VT.isSimple())
565  return false;
566 
567  switch (VT.getSimpleVT().SimpleTy) {
568  case MVT::f32:
569  case MVT::f64:
570  return true;
571  case MVT::f128:
572  return Subtarget.hasVectorEnhancements1();
573  default:
574  break;
575  }
576 
577  return false;
578 }
579 
580 bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
581  // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
582  return Imm.isZero() || Imm.isNegZero();
583 }
584 
586  // We can use CGFI or CLGFI.
587  return isInt<32>(Imm) || isUInt<32>(Imm);
588 }
589 
591  // We can use ALGFI or SLGFI.
592  return isUInt<32>(Imm) || isUInt<32>(-Imm);
593 }
594 
596  unsigned,
597  unsigned,
598  bool *Fast) const {
599  // Unaligned accesses should never be slower than the expanded version.
600  // We check specifically for aligned accesses in the few cases where
601  // they are required.
602  if (Fast)
603  *Fast = true;
604  return true;
605 }
606 
607 // Information about the addressing mode for a memory access.
609  // True if a long displacement is supported.
611 
612  // True if use of index register is supported.
613  bool IndexReg;
614 
615  AddressingMode(bool LongDispl, bool IdxReg) :
616  LongDisplacement(LongDispl), IndexReg(IdxReg) {}
617 };
618 
619 // Return the desired addressing mode for a Load which has only one use (in
620 // the same block) which is a Store.
621 static AddressingMode getLoadStoreAddrMode(bool HasVector,
622  Type *Ty) {
623  // With vector support a Load->Store combination may be combined to either
624  // an MVC or vector operations and it seems to work best to allow the
625  // vector addressing mode.
626  if (HasVector)
627  return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
628 
629  // Otherwise only the MVC case is special.
630  bool MVC = Ty->isIntegerTy(8);
631  return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
632 }
633 
634 // Return the addressing mode which seems most desirable given an LLVM
635 // Instruction pointer.
636 static AddressingMode
638  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
639  switch (II->getIntrinsicID()) {
640  default: break;
641  case Intrinsic::memset:
642  case Intrinsic::memmove:
643  case Intrinsic::memcpy:
644  return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
645  }
646  }
647 
648  if (isa<LoadInst>(I) && I->hasOneUse()) {
649  auto *SingleUser = dyn_cast<Instruction>(*I->user_begin());
650  if (SingleUser->getParent() == I->getParent()) {
651  if (isa<ICmpInst>(SingleUser)) {
652  if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
653  if (C->getBitWidth() <= 64 &&
654  (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
655  // Comparison of memory with 16 bit signed / unsigned immediate
656  return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
657  } else if (isa<StoreInst>(SingleUser))
658  // Load->Store
659  return getLoadStoreAddrMode(HasVector, I->getType());
660  }
661  } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
662  if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
663  if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
664  // Load->Store
665  return getLoadStoreAddrMode(HasVector, LoadI->getType());
666  }
667 
668  if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
669 
670  // * Use LDE instead of LE/LEY for z13 to avoid partial register
671  // dependencies (LDE only supports small offsets).
672  // * Utilize the vector registers to hold floating point
673  // values (vector load / store instructions only support small
674  // offsets).
675 
676  Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
677  I->getOperand(0)->getType());
678  bool IsFPAccess = MemAccessTy->isFloatingPointTy();
679  bool IsVectorAccess = MemAccessTy->isVectorTy();
680 
681  // A store of an extracted vector element will be combined into a VSTE type
682  // instruction.
683  if (!IsVectorAccess && isa<StoreInst>(I)) {
684  Value *DataOp = I->getOperand(0);
685  if (isa<ExtractElementInst>(DataOp))
686  IsVectorAccess = true;
687  }
688 
689  // A load which gets inserted into a vector element will be combined into a
690  // VLE type instruction.
691  if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
692  User *LoadUser = *I->user_begin();
693  if (isa<InsertElementInst>(LoadUser))
694  IsVectorAccess = true;
695  }
696 
697  if (IsFPAccess || IsVectorAccess)
698  return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
699  }
700 
701  return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
702 }
703 
705  const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
706  // Punt on globals for now, although they can be used in limited
707  // RELATIVE LONG cases.
708  if (AM.BaseGV)
709  return false;
710 
711  // Require a 20-bit signed offset.
712  if (!isInt<20>(AM.BaseOffs))
713  return false;
714 
715  AddressingMode SupportedAM(true, true);
716  if (I != nullptr)
717  SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
718 
719  if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
720  return false;
721 
722  if (!SupportedAM.IndexReg)
723  // No indexing allowed.
724  return AM.Scale == 0;
725  else
726  // Indexing is OK but no scale factor can be applied.
727  return AM.Scale == 0 || AM.Scale == 1;
728 }
729 
731  if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
732  return false;
733  unsigned FromBits = FromType->getPrimitiveSizeInBits();
734  unsigned ToBits = ToType->getPrimitiveSizeInBits();
735  return FromBits > ToBits;
736 }
737 
739  if (!FromVT.isInteger() || !ToVT.isInteger())
740  return false;
741  unsigned FromBits = FromVT.getSizeInBits();
742  unsigned ToBits = ToVT.getSizeInBits();
743  return FromBits > ToBits;
744 }
745 
746 //===----------------------------------------------------------------------===//
747 // Inline asm support
748 //===----------------------------------------------------------------------===//
749 
752  if (Constraint.size() == 1) {
753  switch (Constraint[0]) {
754  case 'a': // Address register
755  case 'd': // Data register (equivalent to 'r')
756  case 'f': // Floating-point register
757  case 'h': // High-part register
758  case 'r': // General-purpose register
759  case 'v': // Vector register
760  return C_RegisterClass;
761 
762  case 'Q': // Memory with base and unsigned 12-bit displacement
763  case 'R': // Likewise, plus an index
764  case 'S': // Memory with base and signed 20-bit displacement
765  case 'T': // Likewise, plus an index
766  case 'm': // Equivalent to 'T'.
767  return C_Memory;
768 
769  case 'I': // Unsigned 8-bit constant
770  case 'J': // Unsigned 12-bit constant
771  case 'K': // Signed 16-bit constant
772  case 'L': // Signed 20-bit displacement (on all targets we support)
773  case 'M': // 0x7fffffff
774  return C_Other;
775 
776  default:
777  break;
778  }
779  }
780  return TargetLowering::getConstraintType(Constraint);
781 }
782 
785  const char *constraint) const {
786  ConstraintWeight weight = CW_Invalid;
787  Value *CallOperandVal = info.CallOperandVal;
788  // If we don't have a value, we can't do a match,
789  // but allow it at the lowest weight.
790  if (!CallOperandVal)
791  return CW_Default;
792  Type *type = CallOperandVal->getType();
793  // Look at the constraint type.
794  switch (*constraint) {
795  default:
796  weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
797  break;
798 
799  case 'a': // Address register
800  case 'd': // Data register (equivalent to 'r')
801  case 'h': // High-part register
802  case 'r': // General-purpose register
803  if (CallOperandVal->getType()->isIntegerTy())
804  weight = CW_Register;
805  break;
806 
807  case 'f': // Floating-point register
808  if (type->isFloatingPointTy())
809  weight = CW_Register;
810  break;
811 
812  case 'v': // Vector register
813  if ((type->isVectorTy() || type->isFloatingPointTy()) &&
814  Subtarget.hasVector())
815  weight = CW_Register;
816  break;
817 
818  case 'I': // Unsigned 8-bit constant
819  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
820  if (isUInt<8>(C->getZExtValue()))
821  weight = CW_Constant;
822  break;
823 
824  case 'J': // Unsigned 12-bit constant
825  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
826  if (isUInt<12>(C->getZExtValue()))
827  weight = CW_Constant;
828  break;
829 
830  case 'K': // Signed 16-bit constant
831  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
832  if (isInt<16>(C->getSExtValue()))
833  weight = CW_Constant;
834  break;
835 
836  case 'L': // Signed 20-bit displacement (on all targets we support)
837  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
838  if (isInt<20>(C->getSExtValue()))
839  weight = CW_Constant;
840  break;
841 
842  case 'M': // 0x7fffffff
843  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
844  if (C->getZExtValue() == 0x7fffffff)
845  weight = CW_Constant;
846  break;
847  }
848  return weight;
849 }
850 
851 // Parse a "{tNNN}" register constraint for which the register type "t"
852 // has already been verified. MC is the class associated with "t" and
853 // Map maps 0-based register numbers to LLVM register numbers.
854 static std::pair<unsigned, const TargetRegisterClass *>
856  const unsigned *Map, unsigned Size) {
857  assert(*(Constraint.end()-1) == '}' && "Missing '}'");
858  if (isdigit(Constraint[2])) {
859  unsigned Index;
860  bool Failed =
861  Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
862  if (!Failed && Index < Size && Map[Index])
863  return std::make_pair(Map[Index], RC);
864  }
865  return std::make_pair(0U, nullptr);
866 }
867 
868 std::pair<unsigned, const TargetRegisterClass *>
870  const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
871  if (Constraint.size() == 1) {
872  // GCC Constraint Letters
873  switch (Constraint[0]) {
874  default: break;
875  case 'd': // Data register (equivalent to 'r')
876  case 'r': // General-purpose register
877  if (VT == MVT::i64)
878  return std::make_pair(0U, &SystemZ::GR64BitRegClass);
879  else if (VT == MVT::i128)
880  return std::make_pair(0U, &SystemZ::GR128BitRegClass);
881  return std::make_pair(0U, &SystemZ::GR32BitRegClass);
882 
883  case 'a': // Address register
884  if (VT == MVT::i64)
885  return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
886  else if (VT == MVT::i128)
887  return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
888  return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
889 
890  case 'h': // High-part register (an LLVM extension)
891  return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
892 
893  case 'f': // Floating-point register
894  if (VT == MVT::f64)
895  return std::make_pair(0U, &SystemZ::FP64BitRegClass);
896  else if (VT == MVT::f128)
897  return std::make_pair(0U, &SystemZ::FP128BitRegClass);
898  return std::make_pair(0U, &SystemZ::FP32BitRegClass);
899 
900  case 'v': // Vector register
901  if (Subtarget.hasVector()) {
902  if (VT == MVT::f32)
903  return std::make_pair(0U, &SystemZ::VR32BitRegClass);
904  if (VT == MVT::f64)
905  return std::make_pair(0U, &SystemZ::VR64BitRegClass);
906  return std::make_pair(0U, &SystemZ::VR128BitRegClass);
907  }
908  break;
909  }
910  }
911  if (Constraint.size() > 0 && Constraint[0] == '{') {
912  // We need to override the default register parsing for GPRs and FPRs
913  // because the interpretation depends on VT. The internal names of
914  // the registers are also different from the external names
915  // (F0D and F0S instead of F0, etc.).
916  if (Constraint[1] == 'r') {
917  if (VT == MVT::i32)
918  return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
919  SystemZMC::GR32Regs, 16);
920  if (VT == MVT::i128)
921  return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
923  return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
924  SystemZMC::GR64Regs, 16);
925  }
926  if (Constraint[1] == 'f') {
927  if (VT == MVT::f32)
928  return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
929  SystemZMC::FP32Regs, 16);
930  if (VT == MVT::f128)
931  return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
933  return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
934  SystemZMC::FP64Regs, 16);
935  }
936  if (Constraint[1] == 'v') {
937  if (VT == MVT::f32)
938  return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
939  SystemZMC::VR32Regs, 32);
940  if (VT == MVT::f64)
941  return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
942  SystemZMC::VR64Regs, 32);
943  return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
945  }
946  }
947  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
948 }
949 
951 LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
952  std::vector<SDValue> &Ops,
953  SelectionDAG &DAG) const {
954  // Only support length 1 constraints for now.
955  if (Constraint.length() == 1) {
956  switch (Constraint[0]) {
957  case 'I': // Unsigned 8-bit constant
958  if (auto *C = dyn_cast<ConstantSDNode>(Op))
959  if (isUInt<8>(C->getZExtValue()))
960  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
961  Op.getValueType()));
962  return;
963 
964  case 'J': // Unsigned 12-bit constant
965  if (auto *C = dyn_cast<ConstantSDNode>(Op))
966  if (isUInt<12>(C->getZExtValue()))
967  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
968  Op.getValueType()));
969  return;
970 
971  case 'K': // Signed 16-bit constant
972  if (auto *C = dyn_cast<ConstantSDNode>(Op))
973  if (isInt<16>(C->getSExtValue()))
974  Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
975  Op.getValueType()));
976  return;
977 
978  case 'L': // Signed 20-bit displacement (on all targets we support)
979  if (auto *C = dyn_cast<ConstantSDNode>(Op))
980  if (isInt<20>(C->getSExtValue()))
981  Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
982  Op.getValueType()));
983  return;
984 
985  case 'M': // 0x7fffffff
986  if (auto *C = dyn_cast<ConstantSDNode>(Op))
987  if (C->getZExtValue() == 0x7fffffff)
988  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
989  Op.getValueType()));
990  return;
991  }
992  }
993  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
994 }
995 
996 //===----------------------------------------------------------------------===//
997 // Calling conventions
998 //===----------------------------------------------------------------------===//
999 
1000 #include "SystemZGenCallingConv.inc"
1001 
1003  CallingConv::ID) const {
1004  static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1005  SystemZ::R14D, 0 };
1006  return ScratchRegs;
1007 }
1008 
1010  Type *ToType) const {
1011  return isTruncateFree(FromType, ToType);
1012 }
1013 
1015  return CI->isTailCall();
1016 }
1017 
1018 // We do not yet support 128-bit single-element vector types. If the user
1019 // attempts to use such types as function argument or return type, prefer
1020 // to error out instead of emitting code violating the ABI.
1021 static void VerifyVectorType(MVT VT, EVT ArgVT) {
1022  if (ArgVT.isVector() && !VT.isVector())
1023  report_fatal_error("Unsupported vector argument or return type");
1024 }
1025 
1027  for (unsigned i = 0; i < Ins.size(); ++i)
1028  VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
1029 }
1030 
1032  for (unsigned i = 0; i < Outs.size(); ++i)
1033  VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
1034 }
1035 
1036 // Value is a value that has been passed to us in the location described by VA
1037 // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1038 // any loads onto Chain.
1040  CCValAssign &VA, SDValue Chain,
1041  SDValue Value) {
1042  // If the argument has been promoted from a smaller type, insert an
1043  // assertion to capture this.
1044  if (VA.getLocInfo() == CCValAssign::SExt)
1045  Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
1046  DAG.getValueType(VA.getValVT()));
1047  else if (VA.getLocInfo() == CCValAssign::ZExt)
1048  Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
1049  DAG.getValueType(VA.getValVT()));
1050 
1051  if (VA.isExtInLoc())
1052  Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1053  else if (VA.getLocInfo() == CCValAssign::BCvt) {
1054  // If this is a short vector argument loaded from the stack,
1055  // extend from i64 to full vector size and then bitcast.
1056  assert(VA.getLocVT() == MVT::i64);
1057  assert(VA.getValVT().isVector());
1058  Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1059  Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1060  } else
1061  assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1062  return Value;
1063 }
1064 
1065 // Value is a value of type VA.getValVT() that we need to copy into
1066 // the location described by VA. Return a copy of Value converted to
1067 // VA.getValVT(). The caller is responsible for handling indirect values.
1069  CCValAssign &VA, SDValue Value) {
1070  switch (VA.getLocInfo()) {
1071  case CCValAssign::SExt:
1072  return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1073  case CCValAssign::ZExt:
1074  return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1075  case CCValAssign::AExt:
1076  return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1077  case CCValAssign::BCvt:
1078  // If this is a short vector argument to be stored to the stack,
1079  // bitcast to v2i64 and then extract first element.
1080  assert(VA.getLocVT() == MVT::i64);
1081  assert(VA.getValVT().isVector());
1082  Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value);
1083  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1084  DAG.getConstant(0, DL, MVT::i32));
1085  case CCValAssign::Full:
1086  return Value;
1087  default:
1088  llvm_unreachable("Unhandled getLocInfo()");
1089  }
1090 }
1091 
1093  SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1094  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1095  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1096  MachineFunction &MF = DAG.getMachineFunction();
1097  MachineFrameInfo &MFI = MF.getFrameInfo();
1099  SystemZMachineFunctionInfo *FuncInfo =
1101  auto *TFL =
1102  static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
1103  EVT PtrVT = getPointerTy(DAG.getDataLayout());
1104 
1105  // Detect unsupported vector argument types.
1106  if (Subtarget.hasVector())
1107  VerifyVectorTypes(Ins);
1108 
1109  // Assign locations to all of the incoming arguments.
1111  SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1112  CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1113 
1114  unsigned NumFixedGPRs = 0;
1115  unsigned NumFixedFPRs = 0;
1116  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1117  SDValue ArgValue;
1118  CCValAssign &VA = ArgLocs[I];
1119  EVT LocVT = VA.getLocVT();
1120  if (VA.isRegLoc()) {
1121  // Arguments passed in registers
1122  const TargetRegisterClass *RC;
1123  switch (LocVT.getSimpleVT().SimpleTy) {
1124  default:
1125  // Integers smaller than i64 should be promoted to i64.
1126  llvm_unreachable("Unexpected argument type");
1127  case MVT::i32:
1128  NumFixedGPRs += 1;
1129  RC = &SystemZ::GR32BitRegClass;
1130  break;
1131  case MVT::i64:
1132  NumFixedGPRs += 1;
1133  RC = &SystemZ::GR64BitRegClass;
1134  break;
1135  case MVT::f32:
1136  NumFixedFPRs += 1;
1137  RC = &SystemZ::FP32BitRegClass;
1138  break;
1139  case MVT::f64:
1140  NumFixedFPRs += 1;
1141  RC = &SystemZ::FP64BitRegClass;
1142  break;
1143  case MVT::v16i8:
1144  case MVT::v8i16:
1145  case MVT::v4i32:
1146  case MVT::v2i64:
1147  case MVT::v4f32:
1148  case MVT::v2f64:
1149  RC = &SystemZ::VR128BitRegClass;
1150  break;
1151  }
1152 
1153  unsigned VReg = MRI.createVirtualRegister(RC);
1154  MRI.addLiveIn(VA.getLocReg(), VReg);
1155  ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1156  } else {
1157  assert(VA.isMemLoc() && "Argument not register or memory");
1158 
1159  // Create the frame index object for this incoming parameter.
1160  int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8,
1161  VA.getLocMemOffset(), true);
1162 
1163  // Create the SelectionDAG nodes corresponding to a load
1164  // from this parameter. Unpromoted ints and floats are
1165  // passed as right-justified 8-byte values.
1166  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1167  if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1168  FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1169  DAG.getIntPtrConstant(4, DL));
1170  ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1172  }
1173 
1174  // Convert the value of the argument register into the value that's
1175  // being passed.
1176  if (VA.getLocInfo() == CCValAssign::Indirect) {
1177  InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1178  MachinePointerInfo()));
1179  // If the original argument was split (e.g. i128), we need
1180  // to load all parts of it here (using the same address).
1181  unsigned ArgIndex = Ins[I].OrigArgIndex;
1182  assert (Ins[I].PartOffset == 0);
1183  while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1184  CCValAssign &PartVA = ArgLocs[I + 1];
1185  unsigned PartOffset = Ins[I + 1].PartOffset;
1186  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1187  DAG.getIntPtrConstant(PartOffset, DL));
1188  InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1189  MachinePointerInfo()));
1190  ++I;
1191  }
1192  } else
1193  InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1194  }
1195 
1196  if (IsVarArg) {
1197  // Save the number of non-varargs registers for later use by va_start, etc.
1198  FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1199  FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1200 
1201  // Likewise the address (in the form of a frame index) of where the
1202  // first stack vararg would be. The 1-byte size here is arbitrary.
1203  int64_t StackSize = CCInfo.getNextStackOffset();
1204  FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
1205 
1206  // ...and a similar frame index for the caller-allocated save area
1207  // that will be used to store the incoming registers.
1208  int64_t RegSaveOffset = TFL->getOffsetOfLocalArea();
1209  unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
1210  FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1211 
1212  // Store the FPR varargs in the reserved frame slots. (We store the
1213  // GPRs as part of the prologue.)
1214  if (NumFixedFPRs < SystemZ::NumArgFPRs) {
1215  SDValue MemOps[SystemZ::NumArgFPRs];
1216  for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) {
1217  unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]);
1218  int FI = MFI.CreateFixedObject(8, RegSaveOffset + Offset, true);
1219  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1220  unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I],
1221  &SystemZ::FP64BitRegClass);
1222  SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1223  MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
1225  }
1226  // Join the stores, which are independent of one another.
1227  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1228  makeArrayRef(&MemOps[NumFixedFPRs],
1229  SystemZ::NumArgFPRs-NumFixedFPRs));
1230  }
1231  }
1232 
1233  return Chain;
1234 }
1235 
1236 static bool canUseSiblingCall(const CCState &ArgCCInfo,
1239  // Punt if there are any indirect or stack arguments, or if the call
1240  // needs the callee-saved argument register R6, or if the call uses
1241  // the callee-saved register arguments SwiftSelf and SwiftError.
1242  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1243  CCValAssign &VA = ArgLocs[I];
1244  if (VA.getLocInfo() == CCValAssign::Indirect)
1245  return false;
1246  if (!VA.isRegLoc())
1247  return false;
1248  unsigned Reg = VA.getLocReg();
1249  if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1250  return false;
1251  if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
1252  return false;
1253  }
1254  return true;
1255 }
1256 
1257 SDValue
1259  SmallVectorImpl<SDValue> &InVals) const {
1260  SelectionDAG &DAG = CLI.DAG;
1261  SDLoc &DL = CLI.DL;
1263  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1265  SDValue Chain = CLI.Chain;
1266  SDValue Callee = CLI.Callee;
1267  bool &IsTailCall = CLI.IsTailCall;
1268  CallingConv::ID CallConv = CLI.CallConv;
1269  bool IsVarArg = CLI.IsVarArg;
1270  MachineFunction &MF = DAG.getMachineFunction();
1271  EVT PtrVT = getPointerTy(MF.getDataLayout());
1272 
1273  // Detect unsupported vector argument and return types.
1274  if (Subtarget.hasVector()) {
1275  VerifyVectorTypes(Outs);
1276  VerifyVectorTypes(Ins);
1277  }
1278 
1279  // Analyze the operands of the call, assigning locations to each operand.
1281  SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1282  ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1283 
1284  // We don't support GuaranteedTailCallOpt, only automatically-detected
1285  // sibling calls.
1286  if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
1287  IsTailCall = false;
1288 
1289  // Get a count of how many bytes are to be pushed on the stack.
1290  unsigned NumBytes = ArgCCInfo.getNextStackOffset();
1291 
1292  // Mark the start of the call.
1293  if (!IsTailCall)
1294  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1295 
1296  // Copy argument values to their designated locations.
1298  SmallVector<SDValue, 8> MemOpChains;
1299  SDValue StackPtr;
1300  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1301  CCValAssign &VA = ArgLocs[I];
1302  SDValue ArgValue = OutVals[I];
1303 
1304  if (VA.getLocInfo() == CCValAssign::Indirect) {
1305  // Store the argument in a stack slot and pass its address.
1306  SDValue SpillSlot = DAG.CreateStackTemporary(Outs[I].ArgVT);
1307  int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1308  MemOpChains.push_back(
1309  DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1311  // If the original argument was split (e.g. i128), we need
1312  // to store all parts of it here (and pass just one address).
1313  unsigned ArgIndex = Outs[I].OrigArgIndex;
1314  assert (Outs[I].PartOffset == 0);
1315  while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1316  SDValue PartValue = OutVals[I + 1];
1317  unsigned PartOffset = Outs[I + 1].PartOffset;
1318  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1319  DAG.getIntPtrConstant(PartOffset, DL));
1320  MemOpChains.push_back(
1321  DAG.getStore(Chain, DL, PartValue, Address,
1323  ++I;
1324  }
1325  ArgValue = SpillSlot;
1326  } else
1327  ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1328 
1329  if (VA.isRegLoc())
1330  // Queue up the argument copies and emit them at the end.
1331  RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1332  else {
1333  assert(VA.isMemLoc() && "Argument not register or memory");
1334 
1335  // Work out the address of the stack slot. Unpromoted ints and
1336  // floats are passed as right-justified 8-byte values.
1337  if (!StackPtr.getNode())
1338  StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT);
1340  if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1341  Offset += 4;
1342  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1343  DAG.getIntPtrConstant(Offset, DL));
1344 
1345  // Emit the store.
1346  MemOpChains.push_back(
1347  DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
1348  }
1349  }
1350 
1351  // Join the stores, which are independent of one another.
1352  if (!MemOpChains.empty())
1353  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1354 
1355  // Accept direct calls by converting symbolic call addresses to the
1356  // associated Target* opcodes. Force %r1 to be used for indirect
1357  // tail calls.
1358  SDValue Glue;
1359  if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1360  Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1361  Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1362  } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1363  Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
1364  Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1365  } else if (IsTailCall) {
1366  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
1367  Glue = Chain.getValue(1);
1368  Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
1369  }
1370 
1371  // Build a sequence of copy-to-reg nodes, chained and glued together.
1372  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
1373  Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
1374  RegsToPass[I].second, Glue);
1375  Glue = Chain.getValue(1);
1376  }
1377 
1378  // The first call operand is the chain and the second is the target address.
1380  Ops.push_back(Chain);
1381  Ops.push_back(Callee);
1382 
1383  // Add argument registers to the end of the list so that they are
1384  // known live into the call.
1385  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
1386  Ops.push_back(DAG.getRegister(RegsToPass[I].first,
1387  RegsToPass[I].second.getValueType()));
1388 
1389  // Add a register mask operand representing the call-preserved registers.
1390  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1391  const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
1392  assert(Mask && "Missing call preserved mask for calling convention");
1393  Ops.push_back(DAG.getRegisterMask(Mask));
1394 
1395  // Glue the call to the argument copies, if any.
1396  if (Glue.getNode())
1397  Ops.push_back(Glue);
1398 
1399  // Emit the call.
1400  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1401  if (IsTailCall)
1402  return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
1403  Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
1404  Glue = Chain.getValue(1);
1405 
1406  // Mark the end of the call, which is glued to the call itself.
1407  Chain = DAG.getCALLSEQ_END(Chain,
1408  DAG.getConstant(NumBytes, DL, PtrVT, true),
1409  DAG.getConstant(0, DL, PtrVT, true),
1410  Glue, DL);
1411  Glue = Chain.getValue(1);
1412 
1413  // Assign locations to each value returned by this call.
1415  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
1416  RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
1417 
1418  // Copy all of the result registers out of their specified physreg.
1419  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1420  CCValAssign &VA = RetLocs[I];
1421 
1422  // Copy the value out, gluing the copy to the end of the call sequence.
1423  SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
1424  VA.getLocVT(), Glue);
1425  Chain = RetValue.getValue(1);
1426  Glue = RetValue.getValue(2);
1427 
1428  // Convert the value of the return register into the value that's
1429  // being returned.
1430  InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
1431  }
1432 
1433  return Chain;
1434 }
1435 
1438  MachineFunction &MF, bool isVarArg,
1439  const SmallVectorImpl<ISD::OutputArg> &Outs,
1440  LLVMContext &Context) const {
1441  // Detect unsupported vector return types.
1442  if (Subtarget.hasVector())
1443  VerifyVectorTypes(Outs);
1444 
1445  // Special case that we cannot easily detect in RetCC_SystemZ since
1446  // i128 is not a legal type.
1447  for (auto &Out : Outs)
1448  if (Out.ArgVT == MVT::i128)
1449  return false;
1450 
1452  CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
1453  return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
1454 }
1455 
1456 SDValue
1458  bool IsVarArg,
1459  const SmallVectorImpl<ISD::OutputArg> &Outs,
1460  const SmallVectorImpl<SDValue> &OutVals,
1461  const SDLoc &DL, SelectionDAG &DAG) const {
1462  MachineFunction &MF = DAG.getMachineFunction();
1463 
1464  // Detect unsupported vector return types.
1465  if (Subtarget.hasVector())
1466  VerifyVectorTypes(Outs);
1467 
1468  // Assign locations to each returned value.
1470  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
1471  RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
1472 
1473  // Quick exit for void returns
1474  if (RetLocs.empty())
1475  return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
1476 
1477  // Copy the result values into the output registers.
1478  SDValue Glue;
1479  SmallVector<SDValue, 4> RetOps;
1480  RetOps.push_back(Chain);
1481  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1482  CCValAssign &VA = RetLocs[I];
1483  SDValue RetValue = OutVals[I];
1484 
1485  // Make the return register live on exit.
1486  assert(VA.isRegLoc() && "Can only return in registers!");
1487 
1488  // Promote the value as required.
1489  RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
1490 
1491  // Chain and glue the copies together.
1492  unsigned Reg = VA.getLocReg();
1493  Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
1494  Glue = Chain.getValue(1);
1495  RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
1496  }
1497 
1498  // Update chain and glue.
1499  RetOps[0] = Chain;
1500  if (Glue.getNode())
1501  RetOps.push_back(Glue);
1502 
1503  return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
1504 }
1505 
1506 // Return true if Op is an intrinsic node with chain that returns the CC value
1507 // as its only (other) argument. Provide the associated SystemZISD opcode and
1508 // the mask of valid CC values if so.
1509 static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
1510  unsigned &CCValid) {
1511  unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1512  switch (Id) {
1514  Opcode = SystemZISD::TBEGIN;
1515  CCValid = SystemZ::CCMASK_TBEGIN;
1516  return true;
1517 
1519  Opcode = SystemZISD::TBEGIN_NOFLOAT;
1520  CCValid = SystemZ::CCMASK_TBEGIN;
1521  return true;
1522 
1523  case Intrinsic::s390_tend:
1524  Opcode = SystemZISD::TEND;
1525  CCValid = SystemZ::CCMASK_TEND;
1526  return true;
1527 
1528  default:
1529  return false;
1530  }
1531 }
1532 
1533 // Return true if Op is an intrinsic node without chain that returns the
1534 // CC value as its final argument. Provide the associated SystemZISD
1535 // opcode and the mask of valid CC values if so.
1536 static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
1537  unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1538  switch (Id) {
1542  Opcode = SystemZISD::PACKS_CC;
1543  CCValid = SystemZ::CCMASK_VCMP;
1544  return true;
1545 
1549  Opcode = SystemZISD::PACKLS_CC;
1550  CCValid = SystemZ::CCMASK_VCMP;
1551  return true;
1552 
1557  Opcode = SystemZISD::VICMPES;
1558  CCValid = SystemZ::CCMASK_VCMP;
1559  return true;
1560 
1561  case Intrinsic::s390_vchbs:
1562  case Intrinsic::s390_vchhs:
1563  case Intrinsic::s390_vchfs:
1564  case Intrinsic::s390_vchgs:
1565  Opcode = SystemZISD::VICMPHS;
1566  CCValid = SystemZ::CCMASK_VCMP;
1567  return true;
1568 
1573  Opcode = SystemZISD::VICMPHLS;
1574  CCValid = SystemZ::CCMASK_VCMP;
1575  return true;
1576 
1577  case Intrinsic::s390_vtm:
1578  Opcode = SystemZISD::VTM;
1579  CCValid = SystemZ::CCMASK_VCMP;
1580  return true;
1581 
1585  Opcode = SystemZISD::VFAE_CC;
1586  CCValid = SystemZ::CCMASK_ANY;
1587  return true;
1588 
1592  Opcode = SystemZISD::VFAEZ_CC;
1593  CCValid = SystemZ::CCMASK_ANY;
1594  return true;
1595 
1599  Opcode = SystemZISD::VFEE_CC;
1600  CCValid = SystemZ::CCMASK_ANY;
1601  return true;
1602 
1606  Opcode = SystemZISD::VFEEZ_CC;
1607  CCValid = SystemZ::CCMASK_ANY;
1608  return true;
1609 
1613  Opcode = SystemZISD::VFENE_CC;
1614  CCValid = SystemZ::CCMASK_ANY;
1615  return true;
1616 
1620  Opcode = SystemZISD::VFENEZ_CC;
1621  CCValid = SystemZ::CCMASK_ANY;
1622  return true;
1623 
1627  Opcode = SystemZISD::VISTR_CC;
1629  return true;
1630 
1634  Opcode = SystemZISD::VSTRC_CC;
1635  CCValid = SystemZ::CCMASK_ANY;
1636  return true;
1637 
1641  Opcode = SystemZISD::VSTRCZ_CC;
1642  CCValid = SystemZ::CCMASK_ANY;
1643  return true;
1644 
1647  Opcode = SystemZISD::VFCMPES;
1648  CCValid = SystemZ::CCMASK_VCMP;
1649  return true;
1650 
1653  Opcode = SystemZISD::VFCMPHS;
1654  CCValid = SystemZ::CCMASK_VCMP;
1655  return true;
1656 
1659  Opcode = SystemZISD::VFCMPHES;
1660  CCValid = SystemZ::CCMASK_VCMP;
1661  return true;
1662 
1665  Opcode = SystemZISD::VFTCI;
1666  CCValid = SystemZ::CCMASK_VCMP;
1667  return true;
1668 
1669  case Intrinsic::s390_tdc:
1670  Opcode = SystemZISD::TDC;
1671  CCValid = SystemZ::CCMASK_TDC;
1672  return true;
1673 
1674  default:
1675  return false;
1676  }
1677 }
1678 
1679 // Emit an intrinsic with chain and an explicit CC register result.
1681  unsigned Opcode) {
1682  // Copy all operands except the intrinsic ID.
1683  unsigned NumOps = Op.getNumOperands();
1685  Ops.reserve(NumOps - 1);
1686  Ops.push_back(Op.getOperand(0));
1687  for (unsigned I = 2; I < NumOps; ++I)
1688  Ops.push_back(Op.getOperand(I));
1689 
1690  assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
1691  SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
1692  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
1693  SDValue OldChain = SDValue(Op.getNode(), 1);
1694  SDValue NewChain = SDValue(Intr.getNode(), 1);
1695  DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
1696  return Intr.getNode();
1697 }
1698 
1699 // Emit an intrinsic with an explicit CC register result.
1701  unsigned Opcode) {
1702  // Copy all operands except the intrinsic ID.
1703  unsigned NumOps = Op.getNumOperands();
1705  Ops.reserve(NumOps - 1);
1706  for (unsigned I = 1; I < NumOps; ++I)
1707  Ops.push_back(Op.getOperand(I));
1708 
1709  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
1710  return Intr.getNode();
1711 }
1712 
1713 // CC is a comparison that will be implemented using an integer or
1714 // floating-point comparison. Return the condition code mask for
1715 // a branch on true. In the integer case, CCMASK_CMP_UO is set for
1716 // unsigned comparisons and clear for signed ones. In the floating-point
1717 // case, CCMASK_CMP_UO has its normal mask meaning (unordered).
1718 static unsigned CCMaskForCondCode(ISD::CondCode CC) {
1719 #define CONV(X) \
1720  case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
1721  case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
1722  case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
1723 
1724  switch (CC) {
1725  default:
1726  llvm_unreachable("Invalid integer condition!");
1727 
1728  CONV(EQ);
1729  CONV(NE);
1730  CONV(GT);
1731  CONV(GE);
1732  CONV(LT);
1733  CONV(LE);
1734 
1735  case ISD::SETO: return SystemZ::CCMASK_CMP_O;
1736  case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
1737  }
1738 #undef CONV
1739 }
1740 
1741 // If C can be converted to a comparison against zero, adjust the operands
1742 // as necessary.
1743 static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
1744  if (C.ICmpType == SystemZICMP::UnsignedOnly)
1745  return;
1746 
1747  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
1748  if (!ConstOp1)
1749  return;
1750 
1751  int64_t Value = ConstOp1->getSExtValue();
1752  if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
1753  (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
1754  (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
1755  (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
1756  C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
1757  C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
1758  }
1759 }
1760 
1761 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
1762 // adjust the operands as necessary.
1763 static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
1764  Comparison &C) {
1765  // For us to make any changes, it must a comparison between a single-use
1766  // load and a constant.
1767  if (!C.Op0.hasOneUse() ||
1768  C.Op0.getOpcode() != ISD::LOAD ||
1769  C.Op1.getOpcode() != ISD::Constant)
1770  return;
1771 
1772  // We must have an 8- or 16-bit load.
1773  auto *Load = cast<LoadSDNode>(C.Op0);
1774  unsigned NumBits = Load->getMemoryVT().getStoreSizeInBits();
1775  if (NumBits != 8 && NumBits != 16)
1776  return;
1777 
1778  // The load must be an extending one and the constant must be within the
1779  // range of the unextended value.
1780  auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
1781  uint64_t Value = ConstOp1->getZExtValue();
1782  uint64_t Mask = (1 << NumBits) - 1;
1783  if (Load->getExtensionType() == ISD::SEXTLOAD) {
1784  // Make sure that ConstOp1 is in range of C.Op0.
1785  int64_t SignedValue = ConstOp1->getSExtValue();
1786  if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
1787  return;
1788  if (C.ICmpType != SystemZICMP::SignedOnly) {
1789  // Unsigned comparison between two sign-extended values is equivalent
1790  // to unsigned comparison between two zero-extended values.
1791  Value &= Mask;
1792  } else if (NumBits == 8) {
1793  // Try to treat the comparison as unsigned, so that we can use CLI.
1794  // Adjust CCMask and Value as necessary.
1795  if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
1796  // Test whether the high bit of the byte is set.
1797  Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
1798  else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
1799  // Test whether the high bit of the byte is clear.
1800  Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
1801  else
1802  // No instruction exists for this combination.
1803  return;
1804  C.ICmpType = SystemZICMP::UnsignedOnly;
1805  }
1806  } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
1807  if (Value > Mask)
1808  return;
1809  // If the constant is in range, we can use any comparison.
1810  C.ICmpType = SystemZICMP::Any;
1811  } else
1812  return;
1813 
1814  // Make sure that the first operand is an i32 of the right extension type.
1816  ISD::SEXTLOAD :
1817  ISD::ZEXTLOAD);
1818  if (C.Op0.getValueType() != MVT::i32 ||
1819  Load->getExtensionType() != ExtType) {
1820  C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
1821  Load->getBasePtr(), Load->getPointerInfo(),
1822  Load->getMemoryVT(), Load->getAlignment(),
1823  Load->getMemOperand()->getFlags());
1824  // Update the chain uses.
1825  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
1826  }
1827 
1828  // Make sure that the second operand is an i32 with the right value.
1829  if (C.Op1.getValueType() != MVT::i32 ||
1830  Value != ConstOp1->getZExtValue())
1831  C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
1832 }
1833 
1834 // Return true if Op is either an unextended load, or a load suitable
1835 // for integer register-memory comparisons of type ICmpType.
1836 static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
1837  auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
1838  if (Load) {
1839  // There are no instructions to compare a register with a memory byte.
1840  if (Load->getMemoryVT() == MVT::i8)
1841  return false;
1842  // Otherwise decide on extension type.
1843  switch (Load->getExtensionType()) {
1844  case ISD::NON_EXTLOAD:
1845  return true;
1846  case ISD::SEXTLOAD:
1847  return ICmpType != SystemZICMP::UnsignedOnly;
1848  case ISD::ZEXTLOAD:
1849  return ICmpType != SystemZICMP::SignedOnly;
1850  default:
1851  break;
1852  }
1853  }
1854  return false;
1855 }
1856 
1857 // Return true if it is better to swap the operands of C.
1858 static bool shouldSwapCmpOperands(const Comparison &C) {
1859  // Leave f128 comparisons alone, since they have no memory forms.
1860  if (C.Op0.getValueType() == MVT::f128)
1861  return false;
1862 
1863  // Always keep a floating-point constant second, since comparisons with
1864  // zero can use LOAD TEST and comparisons with other constants make a
1865  // natural memory operand.
1866  if (isa<ConstantFPSDNode>(C.Op1))
1867  return false;
1868 
1869  // Never swap comparisons with zero since there are many ways to optimize
1870  // those later.
1871  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
1872  if (ConstOp1 && ConstOp1->getZExtValue() == 0)
1873  return false;
1874 
1875  // Also keep natural memory operands second if the loaded value is
1876  // only used here. Several comparisons have memory forms.
1877  if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
1878  return false;
1879 
1880  // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
1881  // In that case we generally prefer the memory to be second.
1882  if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
1883  // The only exceptions are when the second operand is a constant and
1884  // we can use things like CHHSI.
1885  if (!ConstOp1)
1886  return true;
1887  // The unsigned memory-immediate instructions can handle 16-bit
1888  // unsigned integers.
1889  if (C.ICmpType != SystemZICMP::SignedOnly &&
1890  isUInt<16>(ConstOp1->getZExtValue()))
1891  return false;
1892  // The signed memory-immediate instructions can handle 16-bit
1893  // signed integers.
1894  if (C.ICmpType != SystemZICMP::UnsignedOnly &&
1895  isInt<16>(ConstOp1->getSExtValue()))
1896  return false;
1897  return true;
1898  }
1899 
1900  // Try to promote the use of CGFR and CLGFR.
1901  unsigned Opcode0 = C.Op0.getOpcode();
1902  if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
1903  return true;
1904  if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
1905  return true;
1906  if (C.ICmpType != SystemZICMP::SignedOnly &&
1907  Opcode0 == ISD::AND &&
1908  C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
1909  cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff)
1910  return true;
1911 
1912  return false;
1913 }
1914 
1915 // Return a version of comparison CC mask CCMask in which the LT and GT
1916 // actions are swapped.
1917 static unsigned reverseCCMask(unsigned CCMask) {
1918  return ((CCMask & SystemZ::CCMASK_CMP_EQ) |
1920  (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) |
1921  (CCMask & SystemZ::CCMASK_CMP_UO));
1922 }
1923 
1924 // Check whether C tests for equality between X and Y and whether X - Y
1925 // or Y - X is also computed. In that case it's better to compare the
1926 // result of the subtraction against zero.
1927 static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
1928  Comparison &C) {
1929  if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
1930  C.CCMask == SystemZ::CCMASK_CMP_NE) {
1931  for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
1932  SDNode *N = *I;
1933  if (N->getOpcode() == ISD::SUB &&
1934  ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
1935  (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
1936  C.Op0 = SDValue(N, 0);
1937  C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
1938  return;
1939  }
1940  }
1941  }
1942 }
1943 
1944 // Check whether C compares a floating-point value with zero and if that
1945 // floating-point value is also negated. In this case we can use the
1946 // negation to set CC, so avoiding separate LOAD AND TEST and
1947 // LOAD (NEGATIVE/COMPLEMENT) instructions.
1948 static void adjustForFNeg(Comparison &C) {
1949  auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
1950  if (C1 && C1->isZero()) {
1951  for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
1952  SDNode *N = *I;
1953  if (N->getOpcode() == ISD::FNEG) {
1954  C.Op0 = SDValue(N, 0);
1955  C.CCMask = reverseCCMask(C.CCMask);
1956  return;
1957  }
1958  }
1959  }
1960 }
1961 
1962 // Check whether C compares (shl X, 32) with 0 and whether X is
1963 // also sign-extended. In that case it is better to test the result
1964 // of the sign extension using LTGFR.
1965 //
1966 // This case is important because InstCombine transforms a comparison
1967 // with (sext (trunc X)) into a comparison with (shl X, 32).
1968 static void adjustForLTGFR(Comparison &C) {
1969  // Check for a comparison between (shl X, 32) and 0.
1970  if (C.Op0.getOpcode() == ISD::SHL &&
1971  C.Op0.getValueType() == MVT::i64 &&
1972  C.Op1.getOpcode() == ISD::Constant &&
1973  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
1974  auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
1975  if (C1 && C1->getZExtValue() == 32) {
1976  SDValue ShlOp0 = C.Op0.getOperand(0);
1977  // See whether X has any SIGN_EXTEND_INREG uses.
1978  for (auto I = ShlOp0->use_begin(), E = ShlOp0->use_end(); I != E; ++I) {
1979  SDNode *N = *I;
1980  if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
1981  cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
1982  C.Op0 = SDValue(N, 0);
1983  return;
1984  }
1985  }
1986  }
1987  }
1988 }
1989 
1990 // If C compares the truncation of an extending load, try to compare
1991 // the untruncated value instead. This exposes more opportunities to
1992 // reuse CC.
1993 static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
1994  Comparison &C) {
1995  if (C.Op0.getOpcode() == ISD::TRUNCATE &&
1996  C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
1997  C.Op1.getOpcode() == ISD::Constant &&
1998  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
1999  auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
2000  if (L->getMemoryVT().getStoreSizeInBits() <= C.Op0.getValueSizeInBits()) {
2001  unsigned Type = L->getExtensionType();
2002  if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
2003  (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
2004  C.Op0 = C.Op0.getOperand(0);
2005  C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2006  }
2007  }
2008  }
2009 }
2010 
2011 // Return true if shift operation N has an in-range constant shift value.
2012 // Store it in ShiftVal if so.
2013 static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
2014  auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
2015  if (!Shift)
2016  return false;
2017 
2018  uint64_t Amount = Shift->getZExtValue();
2019  if (Amount >= N.getValueSizeInBits())
2020  return false;
2021 
2022  ShiftVal = Amount;
2023  return true;
2024 }
2025 
2026 // Check whether an AND with Mask is suitable for a TEST UNDER MASK
2027 // instruction and whether the CC value is descriptive enough to handle
2028 // a comparison of type Opcode between the AND result and CmpVal.
2029 // CCMask says which comparison result is being tested and BitSize is
2030 // the number of bits in the operands. If TEST UNDER MASK can be used,
2031 // return the corresponding CC mask, otherwise return 0.
2032 static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
2033  uint64_t Mask, uint64_t CmpVal,
2034  unsigned ICmpType) {
2035  assert(Mask != 0 && "ANDs with zero should have been removed by now");
2036 
2037  // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2038  if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
2039  !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
2040  return 0;
2041 
2042  // Work out the masks for the lowest and highest bits.
2043  unsigned HighShift = 63 - countLeadingZeros(Mask);
2044  uint64_t High = uint64_t(1) << HighShift;
2045  uint64_t Low = uint64_t(1) << countTrailingZeros(Mask);
2046 
2047  // Signed ordered comparisons are effectively unsigned if the sign
2048  // bit is dropped.
2049  bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
2050 
2051  // Check for equality comparisons with 0, or the equivalent.
2052  if (CmpVal == 0) {
2053  if (CCMask == SystemZ::CCMASK_CMP_EQ)
2054  return SystemZ::CCMASK_TM_ALL_0;
2055  if (CCMask == SystemZ::CCMASK_CMP_NE)
2057  }
2058  if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
2059  if (CCMask == SystemZ::CCMASK_CMP_LT)
2060  return SystemZ::CCMASK_TM_ALL_0;
2061  if (CCMask == SystemZ::CCMASK_CMP_GE)
2063  }
2064  if (EffectivelyUnsigned && CmpVal < Low) {
2065  if (CCMask == SystemZ::CCMASK_CMP_LE)
2066  return SystemZ::CCMASK_TM_ALL_0;
2067  if (CCMask == SystemZ::CCMASK_CMP_GT)
2069  }
2070 
2071  // Check for equality comparisons with the mask, or the equivalent.
2072  if (CmpVal == Mask) {
2073  if (CCMask == SystemZ::CCMASK_CMP_EQ)
2074  return SystemZ::CCMASK_TM_ALL_1;
2075  if (CCMask == SystemZ::CCMASK_CMP_NE)
2077  }
2078  if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
2079  if (CCMask == SystemZ::CCMASK_CMP_GT)
2080  return SystemZ::CCMASK_TM_ALL_1;
2081  if (CCMask == SystemZ::CCMASK_CMP_LE)
2083  }
2084  if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
2085  if (CCMask == SystemZ::CCMASK_CMP_GE)
2086  return SystemZ::CCMASK_TM_ALL_1;
2087  if (CCMask == SystemZ::CCMASK_CMP_LT)
2089  }
2090 
2091  // Check for ordered comparisons with the top bit.
2092  if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
2093  if (CCMask == SystemZ::CCMASK_CMP_LE)
2094  return SystemZ::CCMASK_TM_MSB_0;
2095  if (CCMask == SystemZ::CCMASK_CMP_GT)
2096  return SystemZ::CCMASK_TM_MSB_1;
2097  }
2098  if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
2099  if (CCMask == SystemZ::CCMASK_CMP_LT)
2100  return SystemZ::CCMASK_TM_MSB_0;
2101  if (CCMask == SystemZ::CCMASK_CMP_GE)
2102  return SystemZ::CCMASK_TM_MSB_1;
2103  }
2104 
2105  // If there are just two bits, we can do equality checks for Low and High
2106  // as well.
2107  if (Mask == Low + High) {
2108  if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
2110  if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
2112  if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
2114  if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
2116  }
2117 
2118  // Looks like we've exhausted our options.
2119  return 0;
2120 }
2121 
2122 // See whether C can be implemented as a TEST UNDER MASK instruction.
2123 // Update the arguments with the TM version if so.
2124 static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
2125  Comparison &C) {
2126  // Check that we have a comparison with a constant.
2127  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2128  if (!ConstOp1)
2129  return;
2130  uint64_t CmpVal = ConstOp1->getZExtValue();
2131 
2132  // Check whether the nonconstant input is an AND with a constant mask.
2133  Comparison NewC(C);
2134  uint64_t MaskVal;
2135  ConstantSDNode *Mask = nullptr;
2136  if (C.Op0.getOpcode() == ISD::AND) {
2137  NewC.Op0 = C.Op0.getOperand(0);
2138  NewC.Op1 = C.Op0.getOperand(1);
2139  Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2140  if (!Mask)
2141  return;
2142  MaskVal = Mask->getZExtValue();
2143  } else {
2144  // There is no instruction to compare with a 64-bit immediate
2145  // so use TMHH instead if possible. We need an unsigned ordered
2146  // comparison with an i64 immediate.
2147  if (NewC.Op0.getValueType() != MVT::i64 ||
2148  NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
2149  NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
2150  NewC.ICmpType == SystemZICMP::SignedOnly)
2151  return;
2152  // Convert LE and GT comparisons into LT and GE.
2153  if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2154  NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2155  if (CmpVal == uint64_t(-1))
2156  return;
2157  CmpVal += 1;
2158  NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2159  }
2160  // If the low N bits of Op1 are zero than the low N bits of Op0 can
2161  // be masked off without changing the result.
2162  MaskVal = -(CmpVal & -CmpVal);
2163  NewC.ICmpType = SystemZICMP::UnsignedOnly;
2164  }
2165  if (!MaskVal)
2166  return;
2167 
2168  // Check whether the combination of mask, comparison value and comparison
2169  // type are suitable.
2170  unsigned BitSize = NewC.Op0.getValueSizeInBits();
2171  unsigned NewCCMask, ShiftVal;
2172  if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2173  NewC.Op0.getOpcode() == ISD::SHL &&
2174  isSimpleShift(NewC.Op0, ShiftVal) &&
2175  (MaskVal >> ShiftVal != 0) &&
2176  ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
2177  (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2178  MaskVal >> ShiftVal,
2179  CmpVal >> ShiftVal,
2180  SystemZICMP::Any))) {
2181  NewC.Op0 = NewC.Op0.getOperand(0);
2182  MaskVal >>= ShiftVal;
2183  } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2184  NewC.Op0.getOpcode() == ISD::SRL &&
2185  isSimpleShift(NewC.Op0, ShiftVal) &&
2186  (MaskVal << ShiftVal != 0) &&
2187  ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
2188  (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2189  MaskVal << ShiftVal,
2190  CmpVal << ShiftVal,
2192  NewC.Op0 = NewC.Op0.getOperand(0);
2193  MaskVal <<= ShiftVal;
2194  } else {
2195  NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2196  NewC.ICmpType);
2197  if (!NewCCMask)
2198  return;
2199  }
2200 
2201  // Go ahead and make the change.
2202  C.Opcode = SystemZISD::TM;
2203  C.Op0 = NewC.Op0;
2204  if (Mask && Mask->getZExtValue() == MaskVal)
2205  C.Op1 = SDValue(Mask, 0);
2206  else
2207  C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2208  C.CCValid = SystemZ::CCMASK_TM;
2209  C.CCMask = NewCCMask;
2210 }
2211 
2212 // See whether the comparison argument contains a redundant AND
2213 // and remove it if so. This sometimes happens due to the generic
2214 // BRCOND expansion.
2215 static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,
2216  Comparison &C) {
2217  if (C.Op0.getOpcode() != ISD::AND)
2218  return;
2219  auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2220  if (!Mask)
2221  return;
2222  KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
2223  if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
2224  return;
2225 
2226  C.Op0 = C.Op0.getOperand(0);
2227 }
2228 
2229 // Return a Comparison that tests the condition-code result of intrinsic
2230 // node Call against constant integer CC using comparison code Cond.
2231 // Opcode is the opcode of the SystemZISD operation for the intrinsic
2232 // and CCValid is the set of possible condition-code results.
2233 static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
2234  SDValue Call, unsigned CCValid, uint64_t CC,
2235  ISD::CondCode Cond) {
2236  Comparison C(Call, SDValue());
2237  C.Opcode = Opcode;
2238  C.CCValid = CCValid;
2239  if (Cond == ISD::SETEQ)
2240  // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
2241  C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
2242  else if (Cond == ISD::SETNE)
2243  // ...and the inverse of that.
2244  C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
2245  else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
2246  // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
2247  // always true for CC>3.
2248  C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
2249  else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
2250  // ...and the inverse of that.
2251  C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
2252  else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
2253  // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
2254  // always true for CC>3.
2255  C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
2256  else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
2257  // ...and the inverse of that.
2258  C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
2259  else
2260  llvm_unreachable("Unexpected integer comparison type");
2261  C.CCMask &= CCValid;
2262  return C;
2263 }
2264 
2265 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
2266 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
2267  ISD::CondCode Cond, const SDLoc &DL) {
2268  if (CmpOp1.getOpcode() == ISD::Constant) {
2269  uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
2270  unsigned Opcode, CCValid;
2271  if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
2272  CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
2273  isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
2274  return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2275  if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
2276  CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
2277  isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
2278  return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2279  }
2280  Comparison C(CmpOp0, CmpOp1);
2281  C.CCMask = CCMaskForCondCode(Cond);
2282  if (C.Op0.getValueType().isFloatingPoint()) {
2283  C.CCValid = SystemZ::CCMASK_FCMP;
2284  C.Opcode = SystemZISD::FCMP;
2285  adjustForFNeg(C);
2286  } else {
2287  C.CCValid = SystemZ::CCMASK_ICMP;
2288  C.Opcode = SystemZISD::ICMP;
2289  // Choose the type of comparison. Equality and inequality tests can
2290  // use either signed or unsigned comparisons. The choice also doesn't
2291  // matter if both sign bits are known to be clear. In those cases we
2292  // want to give the main isel code the freedom to choose whichever
2293  // form fits best.
2294  if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2295  C.CCMask == SystemZ::CCMASK_CMP_NE ||
2296  (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
2297  C.ICmpType = SystemZICMP::Any;
2298  else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
2299  C.ICmpType = SystemZICMP::UnsignedOnly;
2300  else
2301  C.ICmpType = SystemZICMP::SignedOnly;
2302  C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
2303  adjustForRedundantAnd(DAG, DL, C);
2304  adjustZeroCmp(DAG, DL, C);
2305  adjustSubwordCmp(DAG, DL, C);
2306  adjustForSubtraction(DAG, DL, C);
2307  adjustForLTGFR(C);
2308  adjustICmpTruncate(DAG, DL, C);
2309  }
2310 
2311  if (shouldSwapCmpOperands(C)) {
2312  std::swap(C.Op0, C.Op1);
2313  C.CCMask = reverseCCMask(C.CCMask);
2314  }
2315 
2316  adjustForTestUnderMask(DAG, DL, C);
2317  return C;
2318 }
2319 
2320 // Emit the comparison instruction described by C.
2321 static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2322  if (!C.Op1.getNode()) {
2323  SDNode *Node;
2324  switch (C.Op0.getOpcode()) {
2326  Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
2327  return SDValue(Node, 0);
2329  Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
2330  return SDValue(Node, Node->getNumValues() - 1);
2331  default:
2332  llvm_unreachable("Invalid comparison operands");
2333  }
2334  }
2335  if (C.Opcode == SystemZISD::ICMP)
2336  return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
2337  DAG.getConstant(C.ICmpType, DL, MVT::i32));
2338  if (C.Opcode == SystemZISD::TM) {
2339  bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
2340  bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
2341  return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
2342  DAG.getConstant(RegisterOnly, DL, MVT::i32));
2343  }
2344  return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
2345 }
2346 
2347 // Implement a 32-bit *MUL_LOHI operation by extending both operands to
2348 // 64 bits. Extend is the extension type to use. Store the high part
2349 // in Hi and the low part in Lo.
2350 static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
2351  SDValue Op0, SDValue Op1, SDValue &Hi,
2352  SDValue &Lo) {
2353  Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
2354  Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
2355  SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
2356  Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2357  DAG.getConstant(32, DL, MVT::i64));
2358  Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
2359  Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
2360 }
2361 
2362 // Lower a binary operation that produces two VT results, one in each
2363 // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
2364 // and Opcode performs the GR128 operation. Store the even register result
2365 // in Even and the odd register result in Odd.
2366 static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
2367  unsigned Opcode, SDValue Op0, SDValue Op1,
2368  SDValue &Even, SDValue &Odd) {
2369  SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
2370  bool Is32Bit = is32Bit(VT);
2371  Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
2372  Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
2373 }
2374 
2375 // Return an i32 value that is 1 if the CC value produced by CCReg is
2376 // in the mask CCMask and 0 otherwise. CC is known to have a value
2377 // in CCValid, so other values can be ignored.
2378 static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
2379  unsigned CCValid, unsigned CCMask) {
2380  SDValue Ops[] = { DAG.getConstant(1, DL, MVT::i32),
2381  DAG.getConstant(0, DL, MVT::i32),
2382  DAG.getConstant(CCValid, DL, MVT::i32),
2383  DAG.getConstant(CCMask, DL, MVT::i32), CCReg };
2384  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
2385 }
2386 
2387 // Return the SystemISD vector comparison operation for CC, or 0 if it cannot
2388 // be done directly. IsFP is true if CC is for a floating-point rather than
2389 // integer comparison.
2390 static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) {
2391  switch (CC) {
2392  case ISD::SETOEQ:
2393  case ISD::SETEQ:
2394  return IsFP ? SystemZISD::VFCMPE : SystemZISD::VICMPE;
2395 
2396  case ISD::SETOGE:
2397  case ISD::SETGE:
2398  return IsFP ? SystemZISD::VFCMPHE : static_cast<SystemZISD::NodeType>(0);
2399 
2400  case ISD::SETOGT:
2401  case ISD::SETGT:
2402  return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH;
2403 
2404  case ISD::SETUGT:
2405  return IsFP ? static_cast<SystemZISD::NodeType>(0) : SystemZISD::VICMPHL;
2406 
2407  default:
2408  return 0;
2409  }
2410 }
2411 
2412 // Return the SystemZISD vector comparison operation for CC or its inverse,
2413 // or 0 if neither can be done directly. Indicate in Invert whether the
2414 // result is for the inverse of CC. IsFP is true if CC is for a
2415 // floating-point rather than integer comparison.
2416 static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP,
2417  bool &Invert) {
2418  if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
2419  Invert = false;
2420  return Opcode;
2421  }
2422 
2423  CC = ISD::getSetCCInverse(CC, !IsFP);
2424  if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
2425  Invert = true;
2426  return Opcode;
2427  }
2428 
2429  return 0;
2430 }
2431 
2432 // Return a v2f64 that contains the extended form of elements Start and Start+1
2433 // of v4f32 value Op.
2434 static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
2435  SDValue Op) {
2436  int Mask[] = { Start, -1, Start + 1, -1 };
2437  Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
2438  return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
2439 }
2440 
2441 // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
2442 // producing a result of type VT.
2443 SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
2444  const SDLoc &DL, EVT VT,
2445  SDValue CmpOp0,
2446  SDValue CmpOp1) const {
2447  // There is no hardware support for v4f32 (unless we have the vector
2448  // enhancements facility 1), so extend the vector into two v2f64s
2449  // and compare those.
2450  if (CmpOp0.getValueType() == MVT::v4f32 &&
2451  !Subtarget.hasVectorEnhancements1()) {
2452  SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0);
2453  SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0);
2454  SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1);
2455  SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1);
2456  SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
2457  SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
2458  return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
2459  }
2460  return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
2461 }
2462 
2463 // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
2464 // an integer mask of type VT.
2465 SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
2466  const SDLoc &DL, EVT VT,
2467  ISD::CondCode CC,
2468  SDValue CmpOp0,
2469  SDValue CmpOp1) const {
2470  bool IsFP = CmpOp0.getValueType().isFloatingPoint();
2471  bool Invert = false;
2472  SDValue Cmp;
2473  switch (CC) {
2474  // Handle tests for order using (or (ogt y x) (oge x y)).
2475  case ISD::SETUO:
2476  Invert = true;
2478  case ISD::SETO: {
2479  assert(IsFP && "Unexpected integer comparison");
2480  SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
2481  SDValue GE = getVectorCmp(DAG, SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1);
2482  Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
2483  break;
2484  }
2485 
2486  // Handle <> tests using (or (ogt y x) (ogt x y)).
2487  case ISD::SETUEQ:
2488  Invert = true;
2490  case ISD::SETONE: {
2491  assert(IsFP && "Unexpected integer comparison");
2492  SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
2493  SDValue GT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1);
2494  Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
2495  break;
2496  }
2497 
2498  // Otherwise a single comparison is enough. It doesn't really
2499  // matter whether we try the inversion or the swap first, since
2500  // there are no cases where both work.
2501  default:
2502  if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
2503  Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1);
2504  else {
2506  if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
2507  Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0);
2508  else
2509  llvm_unreachable("Unhandled comparison");
2510  }
2511  break;
2512  }
2513  if (Invert) {
2515  DAG.getConstant(65535, DL, MVT::i32));
2516  Mask = DAG.getNode(ISD::BITCAST, DL, VT, Mask);
2517  Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
2518  }
2519  return Cmp;
2520 }
2521 
2522 SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
2523  SelectionDAG &DAG) const {
2524  SDValue CmpOp0 = Op.getOperand(0);
2525  SDValue CmpOp1 = Op.getOperand(1);
2526  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2527  SDLoc DL(Op);
2528  EVT VT = Op.getValueType();
2529  if (VT.isVector())
2530  return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
2531 
2532  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
2533  SDValue CCReg = emitCmp(DAG, DL, C);
2534  return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
2535 }
2536 
2537 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
2538  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
2539  SDValue CmpOp0 = Op.getOperand(2);
2540  SDValue CmpOp1 = Op.getOperand(3);
2541  SDValue Dest = Op.getOperand(4);
2542  SDLoc DL(Op);
2543 
2544  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
2545  SDValue CCReg = emitCmp(DAG, DL, C);
2546  return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
2547  Op.getOperand(0), DAG.getConstant(C.CCValid, DL, MVT::i32),
2548  DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
2549 }
2550 
2551 // Return true if Pos is CmpOp and Neg is the negative of CmpOp,
2552 // allowing Pos and Neg to be wider than CmpOp.
2553 static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
2554  return (Neg.getOpcode() == ISD::SUB &&
2555  Neg.getOperand(0).getOpcode() == ISD::Constant &&
2556  cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 &&
2557  Neg.getOperand(1) == Pos &&
2558  (Pos == CmpOp ||
2559  (Pos.getOpcode() == ISD::SIGN_EXTEND &&
2560  Pos.getOperand(0) == CmpOp)));
2561 }
2562 
2563 // Return the absolute or negative absolute of Op; IsNegative decides which.
2564 static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op,
2565  bool IsNegative) {
2566  Op = DAG.getNode(SystemZISD::IABS, DL, Op.getValueType(), Op);
2567  if (IsNegative)
2568  Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
2569  DAG.getConstant(0, DL, Op.getValueType()), Op);
2570  return Op;
2571 }
2572 
2573 SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
2574  SelectionDAG &DAG) const {
2575  SDValue CmpOp0 = Op.getOperand(0);
2576  SDValue CmpOp1 = Op.getOperand(1);
2577  SDValue TrueOp = Op.getOperand(2);
2578  SDValue FalseOp = Op.getOperand(3);
2579  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
2580  SDLoc DL(Op);
2581 
2582  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
2583 
2584  // Check for absolute and negative-absolute selections, including those
2585  // where the comparison value is sign-extended (for LPGFR and LNGFR).
2586  // This check supplements the one in DAGCombiner.
2587  if (C.Opcode == SystemZISD::ICMP &&
2588  C.CCMask != SystemZ::CCMASK_CMP_EQ &&
2589  C.CCMask != SystemZ::CCMASK_CMP_NE &&
2590  C.Op1.getOpcode() == ISD::Constant &&
2591  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2592  if (isAbsolute(C.Op0, TrueOp, FalseOp))
2593  return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
2594  if (isAbsolute(C.Op0, FalseOp, TrueOp))
2595  return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
2596  }
2597 
2598  SDValue CCReg = emitCmp(DAG, DL, C);
2599  SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32),
2600  DAG.getConstant(C.CCMask, DL, MVT::i32), CCReg};
2601 
2602  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
2603 }
2604 
2605 SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
2606  SelectionDAG &DAG) const {
2607  SDLoc DL(Node);
2608  const GlobalValue *GV = Node->getGlobal();
2609  int64_t Offset = Node->getOffset();
2610  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2612 
2613  SDValue Result;
2614  if (Subtarget.isPC32DBLSymbol(GV, CM)) {
2615  // Assign anchors at 1<<12 byte boundaries.
2616  uint64_t Anchor = Offset & ~uint64_t(0xfff);
2617  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
2618  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2619 
2620  // The offset can be folded into the address if it is aligned to a halfword.
2621  Offset -= Anchor;
2622  if (Offset != 0 && (Offset & 1) == 0) {
2623  SDValue Full = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
2624  Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
2625  Offset = 0;
2626  }
2627  } else {
2628  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
2629  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2630  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
2632  }
2633 
2634  // If there was a non-zero offset that we didn't fold, create an explicit
2635  // addition for it.
2636  if (Offset != 0)
2637  Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
2638  DAG.getConstant(Offset, DL, PtrVT));
2639 
2640  return Result;
2641 }
2642 
2643 SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
2644  SelectionDAG &DAG,
2645  unsigned Opcode,
2646  SDValue GOTOffset) const {
2647  SDLoc DL(Node);
2648  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2649  SDValue Chain = DAG.getEntryNode();
2650  SDValue Glue;
2651 
2652  // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
2653  SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
2654  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
2655  Glue = Chain.getValue(1);
2656  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
2657  Glue = Chain.getValue(1);
2658 
2659  // The first call operand is the chain and the second is the TLS symbol.
2661  Ops.push_back(Chain);
2662  Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
2663  Node->getValueType(0),
2664  0, 0));
2665 
2666  // Add argument registers to the end of the list so that they are
2667  // known live into the call.
2668  Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
2669  Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
2670 
2671  // Add a register mask operand representing the call-preserved registers.
2672  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2673  const uint32_t *Mask =
2675  assert(Mask && "Missing call preserved mask for calling convention");
2676  Ops.push_back(DAG.getRegisterMask(Mask));
2677 
2678  // Glue the call to the argument copies.
2679  Ops.push_back(Glue);
2680 
2681  // Emit the call.
2682  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2683  Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
2684  Glue = Chain.getValue(1);
2685 
2686  // Copy the return value from %r2.
2687  return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
2688 }
2689 
2690 SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
2691  SelectionDAG &DAG) const {
2692  SDValue Chain = DAG.getEntryNode();
2693  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2694 
2695  // The high part of the thread pointer is in access register 0.
2696  SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
2697  TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
2698 
2699  // The low part of the thread pointer is in access register 1.
2700  SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
2701  TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
2702 
2703  // Merge them into a single 64-bit address.
2704  SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
2705  DAG.getConstant(32, DL, PtrVT));
2706  return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
2707 }
2708 
2709 SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
2710  SelectionDAG &DAG) const {
2711  if (DAG.getTarget().useEmulatedTLS())
2712  return LowerToTLSEmulatedModel(Node, DAG);
2713  SDLoc DL(Node);
2714  const GlobalValue *GV = Node->getGlobal();
2715  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2716  TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
2717 
2718  SDValue TP = lowerThreadPointer(DL, DAG);
2719 
2720  // Get the offset of GA from the thread pointer, based on the TLS model.
2721  SDValue Offset;
2722  switch (model) {
2723  case TLSModel::GeneralDynamic: {
2724  // Load the GOT offset of the tls_index (module ID / per-symbol offset).
2727 
2728  Offset = DAG.getConstantPool(CPV, PtrVT, 8);
2729  Offset = DAG.getLoad(
2730  PtrVT, DL, DAG.getEntryNode(), Offset,
2732 
2733  // Call __tls_get_offset to retrieve the offset.
2734  Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
2735  break;
2736  }
2737 
2738  case TLSModel::LocalDynamic: {
2739  // Load the GOT offset of the module ID.
2742 
2743  Offset = DAG.getConstantPool(CPV, PtrVT, 8);
2744  Offset = DAG.getLoad(
2745  PtrVT, DL, DAG.getEntryNode(), Offset,
2747 
2748  // Call __tls_get_offset to retrieve the module base offset.
2749  Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
2750 
2751  // Note: The SystemZLDCleanupPass will remove redundant computations
2752  // of the module base offset. Count total number of local-dynamic
2753  // accesses to trigger execution of that pass.
2757 
2758  // Add the per-symbol offset.
2760 
2761  SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8);
2762  DTPOffset = DAG.getLoad(
2763  PtrVT, DL, DAG.getEntryNode(), DTPOffset,
2765 
2766  Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
2767  break;
2768  }
2769 
2770  case TLSModel::InitialExec: {
2771  // Load the offset from the GOT.
2772  Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
2774  Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
2775  Offset =
2776  DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
2778  break;
2779  }
2780 
2781  case TLSModel::LocalExec: {
2782  // Force the offset into the constant pool and load it from there.
2785 
2786  Offset = DAG.getConstantPool(CPV, PtrVT, 8);
2787  Offset = DAG.getLoad(
2788  PtrVT, DL, DAG.getEntryNode(), Offset,
2790  break;
2791  }
2792  }
2793 
2794  // Add the base and offset together.
2795  return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
2796 }
2797 
2798 SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
2799  SelectionDAG &DAG) const {
2800  SDLoc DL(Node);
2801  const BlockAddress *BA = Node->getBlockAddress();
2802  int64_t Offset = Node->getOffset();
2803  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2804 
2805  SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
2806  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2807  return Result;
2808 }
2809 
2810 SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
2811  SelectionDAG &DAG) const {
2812  SDLoc DL(JT);
2813  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2814  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2815 
2816  // Use LARL to load the address of the table.
2817  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2818 }
2819 
2820 SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
2821  SelectionDAG &DAG) const {
2822  SDLoc DL(CP);
2823  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2824 
2825  SDValue Result;
2826  if (CP->isMachineConstantPoolEntry())
2827  Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2828  CP->getAlignment());
2829  else
2830  Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2831  CP->getAlignment(), CP->getOffset());
2832 
2833  // Use LARL to load the address of the constant pool entry.
2834  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2835 }
2836 
2837 SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
2838  SelectionDAG &DAG) const {
2839  MachineFunction &MF = DAG.getMachineFunction();
2840  MachineFrameInfo &MFI = MF.getFrameInfo();
2841  MFI.setFrameAddressIsTaken(true);
2842 
2843  SDLoc DL(Op);
2844  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2845  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2846 
2847  // If the back chain frame index has not been allocated yet, do so.
2849  int BackChainIdx = FI->getFramePointerSaveIndex();
2850  if (!BackChainIdx) {
2851  // By definition, the frame address is the address of the back chain.
2852  BackChainIdx = MFI.CreateFixedObject(8, -SystemZMC::CallFrameSize, false);
2853  FI->setFramePointerSaveIndex(BackChainIdx);
2854  }
2855  SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
2856 
2857  // FIXME The frontend should detect this case.
2858  if (Depth > 0) {
2859  report_fatal_error("Unsupported stack frame traversal count");
2860  }
2861 
2862  return BackChain;
2863 }
2864 
2865 SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
2866  SelectionDAG &DAG) const {
2867  MachineFunction &MF = DAG.getMachineFunction();
2868  MachineFrameInfo &MFI = MF.getFrameInfo();
2869  MFI.setReturnAddressIsTaken(true);
2870 
2872  return SDValue();
2873 
2874  SDLoc DL(Op);
2875  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2876  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2877 
2878  // FIXME The frontend should detect this case.
2879  if (Depth > 0) {
2880  report_fatal_error("Unsupported stack frame traversal count");
2881  }
2882 
2883  // Return R14D, which has the return address. Mark it an implicit live-in.
2884  unsigned LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass);
2885  return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
2886 }
2887 
2888 SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
2889  SelectionDAG &DAG) const {
2890  SDLoc DL(Op);
2891  SDValue In = Op.getOperand(0);
2892  EVT InVT = In.getValueType();
2893  EVT ResVT = Op.getValueType();
2894 
2895  // Convert loads directly. This is normally done by DAGCombiner,
2896  // but we need this case for bitcasts that are created during lowering
2897  // and which are then lowered themselves.
2898  if (auto *LoadN = dyn_cast<LoadSDNode>(In))
2899  if (ISD::isNormalLoad(LoadN)) {
2900  SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
2901  LoadN->getBasePtr(), LoadN->getMemOperand());
2902  // Update the chain uses.
2903  DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
2904  return NewLoad;
2905  }
2906 
2907  if (InVT == MVT::i32 && ResVT == MVT::f32) {
2908  SDValue In64;
2909  if (Subtarget.hasHighWord()) {
2910  SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
2911  MVT::i64);
2912  In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
2913  MVT::i64, SDValue(U64, 0), In);
2914  } else {
2915  In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
2916  In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
2917  DAG.getConstant(32, DL, MVT::i64));
2918  }
2919  SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
2920  return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
2921  DL, MVT::f32, Out64);
2922  }
2923  if (InVT == MVT::f32 && ResVT == MVT::i32) {
2924  SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
2925  SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
2926  MVT::f64, SDValue(U64, 0), In);
2927  SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
2928  if (Subtarget.hasHighWord())
2929  return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
2930  MVT::i32, Out64);
2931  SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
2932  DAG.getConstant(32, DL, MVT::i64));
2933  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
2934  }
2935  llvm_unreachable("Unexpected bitcast combination");
2936 }
2937 
2938 SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
2939  SelectionDAG &DAG) const {
2940  MachineFunction &MF = DAG.getMachineFunction();
2941  SystemZMachineFunctionInfo *FuncInfo =
2943  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2944 
2945  SDValue Chain = Op.getOperand(0);
2946  SDValue Addr = Op.getOperand(1);
2947  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2948  SDLoc DL(Op);
2949 
2950  // The initial values of each field.
2951  const unsigned NumFields = 4;
2952  SDValue Fields[NumFields] = {
2953  DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
2954  DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
2955  DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
2956  DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
2957  };
2958 
2959  // Store each field into its respective slot.
2960  SDValue MemOps[NumFields];
2961  unsigned Offset = 0;
2962  for (unsigned I = 0; I < NumFields; ++I) {
2963  SDValue FieldAddr = Addr;
2964  if (Offset != 0)
2965  FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
2966  DAG.getIntPtrConstant(Offset, DL));
2967  MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
2968  MachinePointerInfo(SV, Offset));
2969  Offset += 8;
2970  }
2971  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
2972 }
2973 
2974 SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
2975  SelectionDAG &DAG) const {
2976  SDValue Chain = Op.getOperand(0);
2977  SDValue DstPtr = Op.getOperand(1);
2978  SDValue SrcPtr = Op.getOperand(2);
2979  const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
2980  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
2981  SDLoc DL(Op);
2982 
2983  return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL),
2984  /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false,
2985  /*isTailCall*/false,
2986  MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
2987 }
2988 
2989 SDValue SystemZTargetLowering::
2990 lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
2991  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
2992  MachineFunction &MF = DAG.getMachineFunction();
2993  bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
2994  bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
2995 
2996  SDValue Chain = Op.getOperand(0);
2997  SDValue Size = Op.getOperand(1);
2998  SDValue Align = Op.getOperand(2);
2999  SDLoc DL(Op);
3000 
3001  // If user has set the no alignment function attribute, ignore
3002  // alloca alignments.
3003  uint64_t AlignVal = (RealignOpt ?
3004  dyn_cast<ConstantSDNode>(Align)->getZExtValue() : 0);
3005 
3006  uint64_t StackAlign = TFI->getStackAlignment();
3007  uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3008  uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3009 
3010  unsigned SPReg = getStackPointerRegisterToSaveRestore();
3011  SDValue NeededSpace = Size;
3012 
3013  // Get a reference to the stack pointer.
3014  SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
3015 
3016  // If we need a backchain, save it now.
3017  SDValue Backchain;
3018  if (StoreBackchain)
3019  Backchain = DAG.getLoad(MVT::i64, DL, Chain, OldSP, MachinePointerInfo());
3020 
3021  // Add extra space for alignment if needed.
3022  if (ExtraAlignSpace)
3023  NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
3024  DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3025 
3026  // Get the new stack pointer value.
3027  SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
3028 
3029  // Copy the new stack pointer back.
3030  Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
3031 
3032  // The allocated data lives above the 160 bytes allocated for the standard
3033  // frame, plus any outgoing stack arguments. We don't know how much that
3034  // amounts to yet, so emit a special ADJDYNALLOC placeholder.
3035  SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3036  SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
3037 
3038  // Dynamically realign if needed.
3039  if (RequiredAlign > StackAlign) {
3040  Result =
3041  DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
3042  DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3043  Result =
3044  DAG.getNode(ISD::AND, DL, MVT::i64, Result,
3045  DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
3046  }
3047 
3048  if (StoreBackchain)
3049  Chain = DAG.getStore(Chain, DL, Backchain, NewSP, MachinePointerInfo());
3050 
3051  SDValue Ops[2] = { Result, Chain };
3052  return DAG.getMergeValues(Ops, DL);
3053 }
3054 
3055 SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
3056  SDValue Op, SelectionDAG &DAG) const {
3057  SDLoc DL(Op);
3058 
3059  return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3060 }
3061 
3062 SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
3063  SelectionDAG &DAG) const {
3064  EVT VT = Op.getValueType();
3065  SDLoc DL(Op);
3066  SDValue Ops[2];
3067  if (is32Bit(VT))
3068  // Just do a normal 64-bit multiplication and extract the results.
3069  // We define this so that it can be used for constant division.
3071  Op.getOperand(1), Ops[1], Ops[0]);
3072  else if (Subtarget.hasMiscellaneousExtensions2())
3073  // SystemZISD::SMUL_LOHI returns the low result in the odd register and
3074  // the high result in the even register. ISD::SMUL_LOHI is defined to
3075  // return the low half first, so the results are in reverse order.
3077  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3078  else {
3079  // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
3080  //
3081  // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
3082  //
3083  // but using the fact that the upper halves are either all zeros
3084  // or all ones:
3085  //
3086  // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
3087  //
3088  // and grouping the right terms together since they are quicker than the
3089  // multiplication:
3090  //
3091  // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
3092  SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
3093  SDValue LL = Op.getOperand(0);
3094  SDValue RL = Op.getOperand(1);
3095  SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
3096  SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
3097  // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3098  // the high result in the even register. ISD::SMUL_LOHI is defined to
3099  // return the low half first, so the results are in reverse order.
3101  LL, RL, Ops[1], Ops[0]);
3102  SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
3103  SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
3104  SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
3105  Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
3106  }
3107  return DAG.getMergeValues(Ops, DL);
3108 }
3109 
3110 SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
3111  SelectionDAG &DAG) const {
3112  EVT VT = Op.getValueType();
3113  SDLoc DL(Op);
3114  SDValue Ops[2];
3115  if (is32Bit(VT))
3116  // Just do a normal 64-bit multiplication and extract the results.
3117  // We define this so that it can be used for constant division.
3119  Op.getOperand(1), Ops[1], Ops[0]);
3120  else
3121  // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3122  // the high result in the even register. ISD::UMUL_LOHI is defined to
3123  // return the low half first, so the results are in reverse order.
3125  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3126  return DAG.getMergeValues(Ops, DL);
3127 }
3128 
3129 SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
3130  SelectionDAG &DAG) const {
3131  SDValue Op0 = Op.getOperand(0);
3132  SDValue Op1 = Op.getOperand(1);
3133  EVT VT = Op.getValueType();
3134  SDLoc DL(Op);
3135 
3136  // We use DSGF for 32-bit division. This means the first operand must
3137  // always be 64-bit, and the second operand should be 32-bit whenever
3138  // that is possible, to improve performance.
3139  if (is32Bit(VT))
3140  Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
3141  else if (DAG.ComputeNumSignBits(Op1) > 32)
3142  Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
3143 
3144  // DSG(F) returns the remainder in the even register and the
3145  // quotient in the odd register.
3146  SDValue Ops[2];
3147  lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
3148  return DAG.getMergeValues(Ops, DL);
3149 }
3150 
3151 SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
3152  SelectionDAG &DAG) const {
3153  EVT VT = Op.getValueType();
3154  SDLoc DL(Op);
3155 
3156  // DL(G) returns the remainder in the even register and the
3157  // quotient in the odd register.
3158  SDValue Ops[2];
3160  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3161  return DAG.getMergeValues(Ops, DL);
3162 }
3163 
3164 SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
3165  assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
3166 
3167  // Get the known-zero masks for each operand.
3168  SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
3169  KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
3170  DAG.computeKnownBits(Ops[1])};
3171 
3172  // See if the upper 32 bits of one operand and the lower 32 bits of the
3173  // other are known zero. They are the low and high operands respectively.
3174  uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
3175  Known[1].Zero.getZExtValue() };
3176  unsigned High, Low;
3177  if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
3178  High = 1, Low = 0;
3179  else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
3180  High = 0, Low = 1;
3181  else
3182  return Op;
3183 
3184  SDValue LowOp = Ops[Low];
3185  SDValue HighOp = Ops[High];
3186 
3187  // If the high part is a constant, we're better off using IILH.
3188  if (HighOp.getOpcode() == ISD::Constant)
3189  return Op;
3190 
3191  // If the low part is a constant that is outside the range of LHI,
3192  // then we're better off using IILF.
3193  if (LowOp.getOpcode() == ISD::Constant) {
3194  int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue());
3195  if (!isInt<16>(Value))
3196  return Op;
3197  }
3198 
3199  // Check whether the high part is an AND that doesn't change the
3200  // high 32 bits and just masks out low bits. We can skip it if so.
3201  if (HighOp.getOpcode() == ISD::AND &&
3202  HighOp.getOperand(1).getOpcode() == ISD::Constant) {
3203  SDValue HighOp0 = HighOp.getOperand(0);
3204  uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
3205  if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
3206  HighOp = HighOp0;
3207  }
3208 
3209  // Take advantage of the fact that all GR32 operations only change the
3210  // low 32 bits by truncating Low to an i32 and inserting it directly
3211  // using a subreg. The interesting cases are those where the truncation
3212  // can be folded.
3213  SDLoc DL(Op);
3214  SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
3215  return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
3216  MVT::i64, HighOp, Low32);
3217 }
3218 
3219 // Lower SADDO/SSUBO/UADDO/USUBO nodes.
3220 SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
3221  SelectionDAG &DAG) const {
3222  SDNode *N = Op.getNode();
3223  SDValue LHS = N->getOperand(0);
3224  SDValue RHS = N->getOperand(1);
3225  SDLoc DL(N);
3226  unsigned BaseOp = 0;
3227  unsigned CCValid = 0;
3228  unsigned CCMask = 0;
3229 
3230  switch (Op.getOpcode()) {
3231  default: llvm_unreachable("Unknown instruction!");
3232  case ISD::SADDO:
3233  BaseOp = SystemZISD::SADDO;
3234  CCValid = SystemZ::CCMASK_ARITH;
3236  break;
3237  case ISD::SSUBO:
3238  BaseOp = SystemZISD::SSUBO;
3239  CCValid = SystemZ::CCMASK_ARITH;
3241  break;
3242  case ISD::UADDO:
3243  BaseOp = SystemZISD::UADDO;
3244  CCValid = SystemZ::CCMASK_LOGICAL;
3246  break;
3247  case ISD::USUBO:
3248  BaseOp = SystemZISD::USUBO;
3249  CCValid = SystemZ::CCMASK_LOGICAL;
3251  break;
3252  }
3253 
3254  SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
3255  SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
3256 
3257  SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3258  if (N->getValueType(1) == MVT::i1)
3259  SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3260 
3261  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3262 }
3263 
3264 // Lower ADDCARRY/SUBCARRY nodes.
3265 SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op,
3266  SelectionDAG &DAG) const {
3267 
3268  SDNode *N = Op.getNode();
3269  MVT VT = N->getSimpleValueType(0);
3270 
3271  // Let legalize expand this if it isn't a legal type yet.
3272  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
3273  return SDValue();
3274 
3275  SDValue LHS = N->getOperand(0);
3276  SDValue RHS = N->getOperand(1);
3277  SDValue Carry = Op.getOperand(2);
3278  SDLoc DL(N);
3279  unsigned BaseOp = 0;
3280  unsigned CCValid = 0;
3281  unsigned CCMask = 0;
3282 
3283  switch (Op.getOpcode()) {
3284  default: llvm_unreachable("Unknown instruction!");
3285  case ISD::ADDCARRY:
3286  BaseOp = SystemZISD::ADDCARRY;
3287  CCValid = SystemZ::CCMASK_LOGICAL;
3289  break;
3290  case ISD::SUBCARRY:
3291  BaseOp = SystemZISD::SUBCARRY;
3292  CCValid = SystemZ::CCMASK_LOGICAL;
3294  break;
3295  }
3296 
3297  // Set the condition code from the carry flag.
3298  Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
3299  DAG.getConstant(CCValid, DL, MVT::i32),
3300  DAG.getConstant(CCMask, DL, MVT::i32));
3301 
3302  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
3303  SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
3304 
3305  SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3306  if (N->getValueType(1) == MVT::i1)
3307  SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3308 
3309  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3310 }
3311 
3312 SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
3313  SelectionDAG &DAG) const {
3314  EVT VT = Op.getValueType();
3315  SDLoc DL(Op);
3316  Op = Op.getOperand(0);
3317 
3318  // Handle vector types via VPOPCT.
3319  if (VT.isVector()) {
3320  Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
3321  Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
3322  switch (VT.getScalarSizeInBits()) {
3323  case 8:
3324  break;
3325  case 16: {
3326  Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
3327  SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
3328  SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
3329  Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
3330  Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
3331  break;
3332  }
3333  case 32: {
3335  DAG.getConstant(0, DL, MVT::i32));
3336  Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
3337  break;
3338  }
3339  case 64: {
3341  DAG.getConstant(0, DL, MVT::i32));
3342  Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
3343  Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
3344  break;
3345  }
3346  default:
3347  llvm_unreachable("Unexpected type");
3348  }
3349  return Op;
3350  }
3351 
3352  // Get the known-zero mask for the operand.
3353  KnownBits Known = DAG.computeKnownBits(Op);
3354  unsigned NumSignificantBits = (~Known.Zero).getActiveBits();
3355  if (NumSignificantBits == 0)
3356  return DAG.getConstant(0, DL, VT);
3357 
3358  // Skip known-zero high parts of the operand.
3359  int64_t OrigBitSize = VT.getSizeInBits();
3360  int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits);
3361  BitSize = std::min(BitSize, OrigBitSize);
3362 
3363  // The POPCNT instruction counts the number of bits in each byte.
3364  Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
3365  Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
3366  Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
3367 
3368  // Add up per-byte counts in a binary tree. All bits of Op at
3369  // position larger than BitSize remain zero throughout.
3370  for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
3371  SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
3372  if (BitSize != OrigBitSize)
3373  Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
3374  DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
3375  Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
3376  }
3377 
3378  // Extract overall result from high byte.
3379  if (BitSize > 8)
3380  Op = DAG.getNode(ISD::SRL, DL, VT, Op,
3381  DAG.getConstant(BitSize - 8, DL, VT));
3382 
3383  return Op;
3384 }
3385 
3386 SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3387  SelectionDAG &DAG) const {
3388  SDLoc DL(Op);
3389  AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
3390  cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
3391  SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
3392  cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
3393 
3394  // The only fence that needs an instruction is a sequentially-consistent
3395  // cross-thread fence.
3396  if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
3397  FenceSSID == SyncScope::System) {
3398  return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
3399  Op.getOperand(0)),
3400  0);
3401  }
3402 
3403  // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3404  return DAG.getNode(SystemZISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3405 }
3406 
3407 // Op is an atomic load. Lower it into a normal volatile load.
3408 SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
3409  SelectionDAG &DAG) const {
3410  auto *Node = cast<AtomicSDNode>(Op.getNode());
3411  return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(),
3412  Node->getChain(), Node->getBasePtr(),
3413  Node->getMemoryVT(), Node->getMemOperand());
3414 }
3415 
3416 // Op is an atomic store. Lower it into a normal volatile store.
3417 SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
3418  SelectionDAG &DAG) const {
3419  auto *Node = cast<AtomicSDNode>(Op.getNode());
3420  SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(),
3421  Node->getBasePtr(), Node->getMemoryVT(),
3422  Node->getMemOperand());
3423  // We have to enforce sequential consistency by performing a
3424  // serialization operation after the store.
3425  if (Node->getOrdering() == AtomicOrdering::SequentiallyConsistent)
3426  Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op),
3427  MVT::Other, Chain), 0);
3428  return Chain;
3429 }
3430 
3431 // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
3432 // two into the fullword ATOMIC_LOADW_* operation given by Opcode.
3433 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
3434  SelectionDAG &DAG,
3435  unsigned Opcode) const {
3436  auto *Node = cast<AtomicSDNode>(Op.getNode());
3437 
3438  // 32-bit operations need no code outside the main loop.
3439  EVT NarrowVT = Node->getMemoryVT();
3440  EVT WideVT = MVT::i32;
3441  if (NarrowVT == WideVT)
3442  return Op;
3443 
3444  int64_t BitSize = NarrowVT.getSizeInBits();
3445  SDValue ChainIn = Node->getChain();
3446  SDValue Addr = Node->getBasePtr();
3447  SDValue Src2 = Node->getVal();
3448  MachineMemOperand *MMO = Node->getMemOperand();
3449  SDLoc DL(Node);
3450  EVT PtrVT = Addr.getValueType();
3451 
3452  // Convert atomic subtracts of constants into additions.
3453  if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
3454  if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
3456  Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
3457  }
3458 
3459  // Get the address of the containing word.
3460  SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
3461  DAG.getConstant(-4, DL, PtrVT));
3462 
3463  // Get the number of bits that the word must be rotated left in order
3464  // to bring the field to the top bits of a GR32.
3465  SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
3466  DAG.getConstant(3, DL, PtrVT));
3467  BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
3468 
3469  // Get the complementing shift amount, for rotating a field in the top
3470  // bits back to its proper position.
3471  SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
3472  DAG.getConstant(0, DL, WideVT), BitShift);
3473 
3474  // Extend the source operand to 32 bits and prepare it for the inner loop.
3475  // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
3476  // operations require the source to be shifted in advance. (This shift
3477  // can be folded if the source is constant.) For AND and NAND, the lower
3478  // bits must be set, while for other opcodes they should be left clear.
3479  if (Opcode != SystemZISD::ATOMIC_SWAPW)
3480  Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
3481  DAG.getConstant(32 - BitSize, DL, WideVT));
3482  if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
3484  Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
3485  DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
3486 
3487  // Construct the ATOMIC_LOADW_* node.
3488  SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
3489  SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
3490  DAG.getConstant(BitSize, DL, WideVT) };
3491  SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
3492  NarrowVT, MMO);
3493 
3494  // Rotate the result of the final CS so that the field is in the lower
3495  // bits of a GR32, then truncate it.
3496  SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
3497  DAG.getConstant(BitSize, DL, WideVT));
3498  SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
3499 
3500  SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
3501  return DAG.getMergeValues(RetOps, DL);
3502 }
3503 
3504 // Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations
3505 // into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit
3506 // operations into additions.
3507 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
3508  SelectionDAG &DAG) const {
3509  auto *Node = cast<AtomicSDNode>(Op.getNode());
3510  EVT MemVT = Node->getMemoryVT();
3511  if (MemVT == MVT::i32 || MemVT == MVT::i64) {
3512  // A full-width operation.
3513  assert(Op.getValueType() == MemVT && "Mismatched VTs");
3514  SDValue Src2 = Node->getVal();
3515  SDValue NegSrc2;
3516  SDLoc DL(Src2);
3517 
3518  if (auto *Op2 = dyn_cast<ConstantSDNode>(Src2)) {
3519  // Use an addition if the operand is constant and either LAA(G) is
3520  // available or the negative value is in the range of A(G)FHI.
3521  int64_t Value = (-Op2->getAPIntValue()).getSExtValue();
3522  if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1())
3523  NegSrc2 = DAG.getConstant(Value, DL, MemVT);
3524  } else if (Subtarget.hasInterlockedAccess1())
3525  // Use LAA(G) if available.
3526  NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT),
3527  Src2);
3528 
3529  if (NegSrc2.getNode())
3530  return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
3531  Node->getChain(), Node->getBasePtr(), NegSrc2,
3532  Node->getMemOperand());
3533 
3534  // Use the node as-is.
3535  return Op;
3536  }
3537 
3538  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
3539 }
3540 
3541 // Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
3542 SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
3543  SelectionDAG &DAG) const {
3544  auto *Node = cast<AtomicSDNode>(Op.getNode());
3545  SDValue ChainIn = Node->getOperand(0);
3546  SDValue Addr = Node->getOperand(1);
3547  SDValue CmpVal = Node->getOperand(2);
3548  SDValue SwapVal = Node->getOperand(3);
3549  MachineMemOperand *MMO = Node->getMemOperand();
3550  SDLoc DL(Node);
3551 
3552  // We have native support for 32-bit and 64-bit compare and swap, but we
3553  // still need to expand extracting the "success" result from the CC.
3554  EVT NarrowVT = Node->getMemoryVT();
3555  EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
3556  if (NarrowVT == WideVT) {
3557  SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
3558  SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
3560  DL, Tys, Ops, NarrowVT, MMO);
3561  SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
3563 
3564  DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
3566  DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
3567  return SDValue();
3568  }
3569 
3570  // Convert 8-bit and 16-bit compare and swap to a loop, implemented
3571  // via a fullword ATOMIC_CMP_SWAPW operation.
3572  int64_t BitSize = NarrowVT.getSizeInBits();
3573  EVT PtrVT = Addr.getValueType();
3574 
3575  // Get the address of the containing word.
3576  SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
3577  DAG.getConstant(-4, DL, PtrVT));
3578 
3579  // Get the number of bits that the word must be rotated left in order
3580  // to bring the field to the top bits of a GR32.
3581  SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
3582  DAG.getConstant(3, DL, PtrVT));
3583  BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
3584 
3585  // Get the complementing shift amount, for rotating a field in the top
3586  // bits back to its proper position.
3587  SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
3588  DAG.getConstant(0, DL, WideVT), BitShift);
3589 
3590  // Construct the ATOMIC_CMP_SWAPW node.
3591  SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
3592  SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
3593  NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
3595  VTList, Ops, NarrowVT, MMO);
3596  SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
3598 
3599  DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
3601  DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
3602  return SDValue();
3603 }
3604 
3605 SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
3606  SelectionDAG &DAG) const {
3607  MachineFunction &MF = DAG.getMachineFunction();
3608  MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
3609  return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
3610  SystemZ::R15D, Op.getValueType());
3611 }
3612 
3613 SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
3614  SelectionDAG &DAG) const {
3615  MachineFunction &MF = DAG.getMachineFunction();
3616  MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
3617  bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
3618 
3619  SDValue Chain = Op.getOperand(0);
3620  SDValue NewSP = Op.getOperand(1);
3621  SDValue Backchain;
3622  SDLoc DL(Op);
3623 
3624  if (StoreBackchain) {
3625  SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, MVT::i64);
3626  Backchain = DAG.getLoad(MVT::i64, DL, Chain, OldSP, MachinePointerInfo());
3627  }
3628 
3629  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R15D, NewSP);
3630 
3631  if (StoreBackchain)
3632  Chain = DAG.getStore(Chain, DL, Backchain, NewSP, MachinePointerInfo());
3633 
3634  return Chain;
3635 }
3636 
3637 SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
3638  SelectionDAG &DAG) const {
3639  bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3640  if (!IsData)
3641  // Just preserve the chain.
3642  return Op.getOperand(0);
3643 
3644  SDLoc DL(Op);
3645  bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
3646  unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
3647  auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
3648  SDValue Ops[] = {
3649  Op.getOperand(0),
3650  DAG.getConstant(Code, DL, MVT::i32),
3651  Op.getOperand(1)
3652  };
3654  Node->getVTList(), Ops,
3655  Node->getMemoryVT(), Node->getMemOperand());
3656 }
3657 
3658 // Convert condition code in CCReg to an i32 value.
3660  SDLoc DL(CCReg);
3661  SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
3662  return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
3664 }
3665 
3666 SDValue
3667 SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
3668  SelectionDAG &DAG) const {
3669  unsigned Opcode, CCValid;
3670  if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
3671  assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
3672  SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
3673  SDValue CC = getCCResult(DAG, SDValue(Node, 0));
3674  DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
3675  return SDValue();
3676  }
3677 
3678  return SDValue();
3679 }
3680 
3681 SDValue
3682 SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3683  SelectionDAG &DAG) const {
3684  unsigned Opcode, CCValid;
3685  if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
3686  SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
3687  if (Op->getNumValues() == 1)
3688  return getCCResult(DAG, SDValue(Node, 0));
3689  assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
3690  return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
3691  SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
3692  }
3693 
3694  unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3695  switch (Id) {
3697  return lowerThreadPointer(SDLoc(Op), DAG);
3698 
3699  case Intrinsic::s390_vpdi:
3700  return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
3701  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3702 
3703  case Intrinsic::s390_vperm:
3704  return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
3705  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3706 
3707  case Intrinsic::s390_vuphb:
3708  case Intrinsic::s390_vuphh:
3709  case Intrinsic::s390_vuphf:
3710  return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
3711  Op.getOperand(1));
3712 
3716  return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
3717  Op.getOperand(1));
3718 
3719  case Intrinsic::s390_vuplb:
3721  case Intrinsic::s390_vuplf:
3722  return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
3723  Op.getOperand(1));
3724 
3728  return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
3729  Op.getOperand(1));
3730 
3731  case Intrinsic::s390_vsumb:
3732  case Intrinsic::s390_vsumh:
3737  return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
3738  Op.getOperand(1), Op.getOperand(2));
3739  }
3740 
3741  return SDValue();
3742 }
3743 
3744 namespace {
3745 // Says that SystemZISD operation Opcode can be used to perform the equivalent
3746 // of a VPERM with permute vector Bytes. If Opcode takes three operands,
3747 // Operand is the constant third operand, otherwise it is the number of
3748 // bytes in each element of the result.
3749 struct Permute {
3750  unsigned Opcode;
3751  unsigned Operand;
3752  unsigned char Bytes[SystemZ::VectorBytes];
3753 };
3754 }
3755 
3756 static const Permute PermuteForms[] = {
3757  // VMRHG
3759  { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
3760  // VMRHF
3762  { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
3763  // VMRHH
3765  { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
3766  // VMRHB
3768  { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
3769  // VMRLG
3770  { SystemZISD::MERGE_LOW, 8,
3771  { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
3772  // VMRLF
3773  { SystemZISD::MERGE_LOW, 4,
3774  { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
3775  // VMRLH
3776  { SystemZISD::MERGE_LOW, 2,
3777  { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
3778  // VMRLB
3779  { SystemZISD::MERGE_LOW, 1,
3780  { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
3781  // VPKG
3782  { SystemZISD::PACK, 4,
3783  { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
3784  // VPKF
3785  { SystemZISD::PACK, 2,
3786  { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
3787  // VPKH
3788  { SystemZISD::PACK, 1,
3789  { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
3790  // VPDI V1, V2, 4 (low half of V1, high half of V2)
3792  { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
3793  // VPDI V1, V2, 1 (high half of V1, low half of V2)
3795  { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
3796 };
3797 
3798 // Called after matching a vector shuffle against a particular pattern.
3799 // Both the original shuffle and the pattern have two vector operands.
3800 // OpNos[0] is the operand of the original shuffle that should be used for
3801 // operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
3802 // OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
3803 // set OpNo0 and OpNo1 to the shuffle operands that should actually be used
3804 // for operands 0 and 1 of the pattern.
3805 static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
3806  if (OpNos[0] < 0) {
3807  if (OpNos[1] < 0)
3808  return false;
3809  OpNo0 = OpNo1 = OpNos[1];
3810  } else if (OpNos[1] < 0) {
3811  OpNo0 = OpNo1 = OpNos[0];
3812  } else {
3813  OpNo0 = OpNos[0];
3814  OpNo1 = OpNos[1];
3815  }
3816  return true;
3817 }
3818 
3819 // Bytes is a VPERM-like permute vector, except that -1 is used for
3820 // undefined bytes. Return true if the VPERM can be implemented using P.
3821 // When returning true set OpNo0 to the VPERM operand that should be
3822 // used for operand 0 of P and likewise OpNo1 for operand 1 of P.
3823 //
3824 // For example, if swapping the VPERM operands allows P to match, OpNo0
3825 // will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
3826 // operand, but rewriting it to use two duplicated operands allows it to
3827 // match P, then OpNo0 and OpNo1 will be the same.
3828 static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
3829  unsigned &OpNo0, unsigned &OpNo1) {
3830  int OpNos[] = { -1, -1 };
3831  for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
3832  int Elt = Bytes[I];
3833  if (Elt >= 0) {
3834  // Make sure that the two permute vectors use the same suboperand
3835  // byte number. Only the operand numbers (the high bits) are
3836  // allowed to differ.
3837  if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
3838  return false;
3839  int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
3840  int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
3841  // Make sure that the operand mappings are consistent with previous
3842  // elements.
3843  if (OpNos[ModelOpNo] == 1 - RealOpNo)
3844  return false;
3845  OpNos[ModelOpNo] = RealOpNo;
3846  }
3847  }
3848  return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
3849 }
3850 
3851 // As above, but search for a matching permute.
3852 static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
3853  unsigned &OpNo0, unsigned &OpNo1) {
3854  for (auto &P : PermuteForms)
3855  if (matchPermute(Bytes, P, OpNo0, OpNo1))
3856  return &P;
3857  return nullptr;
3858 }
3859 
3860 // Bytes is a VPERM-like permute vector, except that -1 is used for
3861 // undefined bytes. This permute is an operand of an outer permute.
3862 // See whether redistributing the -1 bytes gives a shuffle that can be
3863 // implemented using P. If so, set Transform to a VPERM-like permute vector
3864 // that, when applied to the result of P, gives the original permute in Bytes.
3865 static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes,
3866  const Permute &P,
3867  SmallVectorImpl<int> &Transform) {
3868  unsigned To = 0;
3869  for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
3870  int Elt = Bytes[From];
3871  if (Elt < 0)
3872  // Byte number From of the result is undefined.
3873  Transform[From] = -1;
3874  else {
3875  while (P.Bytes[To] != Elt) {
3876  To += 1;
3877  if (To == SystemZ::VectorBytes)
3878  return false;
3879  }
3880  Transform[From] = To;
3881  }
3882  }
3883  return true;
3884 }
3885 
3886 // As above, but search for a matching permute.
3887 static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
3888  SmallVectorImpl<int> &Transform) {
3889  for (auto &P : PermuteForms)
3890  if (matchDoublePermute(Bytes, P, Transform))
3891  return &P;
3892  return nullptr;
3893 }
3894 
3895 // Convert the mask of the given shuffle op into a byte-level mask,
3896 // as if it had type vNi8.
3897 static bool getVPermMask(SDValue ShuffleOp,
3898  SmallVectorImpl<int> &Bytes) {
3899  EVT VT = ShuffleOp.getValueType();
3900  unsigned NumElements = VT.getVectorNumElements();
3901  unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
3902 
3903  if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
3904  Bytes.resize(NumElements * BytesPerElement, -1);
3905  for (unsigned I = 0; I < NumElements; ++I) {
3906  int Index = VSN->getMaskElt(I);
3907  if (Index >= 0)
3908  for (unsigned J = 0; J < BytesPerElement; ++J)
3909  Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
3910  }
3911  return true;
3912  }
3913  if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
3914  isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
3915  unsigned Index = ShuffleOp.getConstantOperandVal(1);
3916  Bytes.resize(NumElements * BytesPerElement, -1);
3917  for (unsigned I = 0; I < NumElements; ++I)
3918  for (unsigned J = 0; J < BytesPerElement; ++J)
3919  Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
3920  return true;
3921  }
3922  return false;
3923 }
3924 
3925 // Bytes is a VPERM-like permute vector, except that -1 is used for
3926 // undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
3927 // the result come from a contiguous sequence of bytes from one input.
3928 // Set Base to the selector for the first byte if so.
3929 static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
3930  unsigned BytesPerElement, int &Base) {
3931  Base = -1;
3932  for (unsigned I = 0; I < BytesPerElement; ++I) {
3933  if (Bytes[Start + I] >= 0) {
3934  unsigned Elem = Bytes[Start + I];
3935  if (Base < 0) {
3936  Base = Elem - I;
3937  // Make sure the bytes would come from one input operand.
3938  if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
3939  return false;
3940  } else if (unsigned(Base) != Elem - I)
3941  return false;
3942  }
3943  }
3944  return true;
3945 }
3946 
3947 // Bytes is a VPERM-like permute vector, except that -1 is used for
3948 // undefined bytes. Return true if it can be performed using VSLDI.
3949 // When returning true, set StartIndex to the shift amount and OpNo0
3950 // and OpNo1 to the VPERM operands that should be used as the first
3951 // and second shift operand respectively.
3952 static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes,
3953  unsigned &StartIndex, unsigned &OpNo0,
3954  unsigned &OpNo1) {
3955  int OpNos[] = { -1, -1 };
3956  int Shift = -1;
3957  for (unsigned I = 0; I < 16; ++I) {
3958  int Index = Bytes[I];
3959  if (Index >= 0) {
3960  int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
3961  int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
3962  int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
3963  if (Shift < 0)
3964  Shift = ExpectedShift;
3965  else if (Shift != ExpectedShift)
3966  return false;
3967  // Make sure that the operand mappings are consistent with previous
3968  // elements.
3969  if (OpNos[ModelOpNo] == 1 - RealOpNo)
3970  return false;
3971  OpNos[ModelOpNo] = RealOpNo;
3972  }
3973  }
3974  StartIndex = Shift;
3975  return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
3976 }
3977 
3978 // Create a node that performs P on operands Op0 and Op1, casting the
3979 // operands to the appropriate type. The type of the result is determined by P.
3980 static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
3981  const Permute &P, SDValue Op0, SDValue Op1) {
3982  // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
3983  // elements of a PACK are twice as wide as the outputs.
3984  unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
3985  P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
3986  P.Operand);
3987  // Cast both operands to the appropriate type.
3988  MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
3989  SystemZ::VectorBytes / InBytes);
3990  Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
3991  Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
3992  SDValue Op;
3993  if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
3994  SDValue Op2 = DAG.getConstant(P.Operand, DL, MVT::i32);
3995  Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
3996  } else if (P.Opcode == SystemZISD::PACK) {
3997  MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
3998  SystemZ::VectorBytes / P.Operand);
3999  Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
4000  } else {
4001  Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
4002  }
4003  return Op;
4004 }
4005 
4006 // Bytes is a VPERM-like permute vector, except that -1 is used for
4007 // undefined bytes. Implement it on operands Ops[0] and Ops[1] using
4008 // VSLDI or VPERM.
4010  SDValue *Ops,
4011  const SmallVectorImpl<int> &Bytes) {
4012  for (unsigned I = 0; I < 2; ++I)
4013  Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
4014 
4015  // First see whether VSLDI can be used.
4016  unsigned StartIndex, OpNo0, OpNo1;
4017  if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
4018  return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
4019  Ops[OpNo1], DAG.getConstant(StartIndex, DL, MVT::i32));
4020 
4021  // Fall back on VPERM. Construct an SDNode for the permute vector.
4022  SDValue IndexNodes[SystemZ::VectorBytes];
4023  for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
4024  if (Bytes[I] >= 0)
4025  IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
4026  else
4027  IndexNodes[I] = DAG.getUNDEF(MVT::i32);
4028  SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
4029  return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2);
4030 }
4031 
4032 namespace {
4033 // Describes a general N-operand vector shuffle.
4034 struct GeneralShuffle {
4035  GeneralShuffle(EVT vt) : VT(vt) {}
4036  void addUndef();
4037  bool add(SDValue, unsigned);
4038  SDValue getNode(SelectionDAG &, const SDLoc &);
4039 
4040  // The operands of the shuffle.
4042 
4043  // Index I is -1 if byte I of the result is undefined. Otherwise the
4044  // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
4045  // Bytes[I] / SystemZ::VectorBytes.
4047 
4048  // The type of the shuffle result.
4049  EVT VT;
4050 };
4051 }
4052 
4053 // Add an extra undefined element to the shuffle.
4054 void GeneralShuffle::addUndef() {
4055  unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4056  for (unsigned I = 0; I < BytesPerElement; ++I)
4057  Bytes.push_back(-1);
4058 }
4059 
4060 // Add an extra element to the shuffle, taking it from element Elem of Op.
4061 // A null Op indicates a vector input whose value will be calculated later;
4062 // there is at most one such input per shuffle and it always has the same
4063 // type as the result. Aborts and returns false if the source vector elements
4064 // of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
4065 // LLVM they become implicitly extended, but this is rare and not optimized.
4066 bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
4067  unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4068 
4069  // The source vector can have wider elements than the result,
4070  // either through an explicit TRUNCATE or because of type legalization.
4071  // We want the least significant part.
4072  EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
4073  unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
4074 
4075  // Return false if the source elements are smaller than their destination
4076  // elements.
4077  if (FromBytesPerElement < BytesPerElement)
4078  return false;
4079 
4080  unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
4081  (FromBytesPerElement - BytesPerElement));
4082 
4083  // Look through things like shuffles and bitcasts.
4084  while (Op.getNode()) {
4085  if (Op.getOpcode() == ISD::BITCAST)
4086  Op = Op.getOperand(0);
4087  else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
4088  // See whether the bytes we need come from a contiguous part of one
4089  // operand.
4091  if (!getVPermMask(Op, OpBytes))
4092  break;
4093  int NewByte;
4094  if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
4095  break;
4096  if (NewByte < 0) {
4097  addUndef();
4098  return true;
4099  }
4100  Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
4101  Byte = unsigned(NewByte) % SystemZ::VectorBytes;
4102  } else if (Op.isUndef()) {
4103  addUndef();
4104  return true;
4105  } else
4106  break;
4107  }
4108 
4109  // Make sure that the source of the extraction is in Ops.
4110  unsigned OpNo = 0;
4111  for (; OpNo < Ops.size(); ++OpNo)
4112  if (Ops[OpNo] == Op)
4113  break;
4114  if (OpNo == Ops.size())
4115  Ops.push_back(Op);
4116 
4117  // Add the element to Bytes.
4118  unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
4119  for (unsigned I = 0; I < BytesPerElement; ++I)
4120  Bytes.push_back(Base + I);
4121 
4122  return true;
4123 }
4124 
4125 // Return SDNodes for the completed shuffle.
4126 SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
4127  assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
4128 
4129  if (Ops.size() == 0)
4130  return DAG.getUNDEF(VT);
4131 
4132  // Make sure that there are at least two shuffle operands.
4133  if (Ops.size() == 1)
4134  Ops.push_back(DAG.getUNDEF(MVT::v16i8));
4135 
4136  // Create a tree of shuffles, deferring root node until after the loop.
4137  // Try to redistribute the undefined elements of non-root nodes so that
4138  // the non-root shuffles match something like a pack or merge, then adjust
4139  // the parent node's permute vector to compensate for the new order.
4140  // Among other things, this copes with vectors like <2 x i16> that were
4141  // padded with undefined elements during type legalization.
4142  //
4143  // In the best case this redistribution will lead to the whole tree
4144  // using packs and merges. It should rarely be a loss in other cases.
4145  unsigned Stride = 1;
4146  for (; Stride * 2 < Ops.size(); Stride *= 2) {
4147  for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
4148  SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
4149 
4150  // Create a mask for just these two operands.
4152  for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
4153  unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
4154  unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
4155  if (OpNo == I)
4156  NewBytes[J] = Byte;
4157  else if (OpNo == I + Stride)
4158  NewBytes[J] = SystemZ::VectorBytes + Byte;
4159  else
4160  NewBytes[J] = -1;
4161  }
4162  // See if it would be better to reorganize NewMask to avoid using VPERM.
4163  SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes);
4164  if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
4165  Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
4166  // Applying NewBytesMap to Ops[I] gets back to NewBytes.
4167  for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
4168  if (NewBytes[J] >= 0) {
4169  assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
4170  "Invalid double permute");
4171  Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
4172  } else
4173  assert(NewBytesMap[J] < 0 && "Invalid double permute");
4174  }
4175  } else {
4176  // Just use NewBytes on the operands.
4177  Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
4178  for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
4179  if (NewBytes[J] >= 0)
4180  Bytes[J] = I * SystemZ::VectorBytes + J;
4181  }
4182  }
4183  }
4184 
4185  // Now we just have 2 inputs. Put the second operand in Ops[1].
4186  if (Stride > 1) {
4187  Ops[1] = Ops[Stride];
4188  for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
4189  if (Bytes[I] >= int(SystemZ::VectorBytes))
4190  Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
4191  }
4192 
4193  // Look for an instruction that can do the permute without resorting
4194  // to VPERM.
4195  unsigned OpNo0, OpNo1;
4196  SDValue Op;
4197  if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
4198  Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
4199  else
4200  Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
4201  return DAG.getNode(ISD::BITCAST, DL, VT, Op);
4202 }
4203 
4204 // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
4205 static bool isScalarToVector(SDValue Op) {
4206  for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
4207  if (!Op.getOperand(I).isUndef())
4208  return false;
4209  return true;
4210 }
4211 
4212 // Return a vector of type VT that contains Value in the first element.
4213 // The other elements don't matter.
4214 static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
4215  SDValue Value) {
4216  // If we have a constant, replicate it to all elements and let the
4217  // BUILD_VECTOR lowering take care of it.
4218  if (Value.getOpcode() == ISD::Constant ||
4219  Value.getOpcode() == ISD::ConstantFP) {
4221  return DAG.getBuildVector(VT, DL, Ops);
4222  }
4223  if (Value.isUndef())
4224  return DAG.getUNDEF(VT);
4225  return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
4226 }
4227 
4228 // Return a vector of type VT in which Op0 is in element 0 and Op1 is in
4229 // element 1. Used for cases in which replication is cheap.
4230 static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
4231  SDValue Op0, SDValue Op1) {
4232  if (Op0.isUndef()) {
4233  if (Op1.isUndef())
4234  return DAG.getUNDEF(VT);
4235  return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
4236  }
4237  if (Op1.isUndef())
4238  return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
4239  return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
4240  buildScalarToVector(DAG, DL, VT, Op0),
4241  buildScalarToVector(DAG, DL, VT, Op1));
4242 }
4243 
4244 // Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
4245 // vector for them.
4246 static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
4247  SDValue Op1) {
4248  if (Op0.isUndef() && Op1.isUndef())
4249  return DAG.getUNDEF(MVT::v2i64);
4250  // If one of the two inputs is undefined then replicate the other one,
4251  // in order to avoid using another register unnecessarily.
4252  if (Op0.isUndef())
4253  Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
4254  else if (Op1.isUndef())
4255  Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
4256  else {
4257  Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
4258  Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
4259  }
4260  return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
4261 }
4262 
4263 // Try to represent constant BUILD_VECTOR node BVN using a
4264 // SystemZISD::BYTE_MASK-style mask. Store the mask value in Mask
4265 // on success.
4266 static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) {
4267  EVT ElemVT = BVN->getValueType(0).getVectorElementType();
4268  unsigned BytesPerElement = ElemVT.getStoreSize();
4269  for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) {
4270  SDValue Op = BVN->getOperand(I);
4271  if (!Op.isUndef()) {
4272  uint64_t Value;
4273  if (Op.getOpcode() == ISD::Constant)
4274  Value = cast<ConstantSDNode>(Op)->getZExtValue();
4275  else if (Op.getOpcode() == ISD::ConstantFP)
4276  Value = (cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt()
4277  .getZExtValue());
4278  else
4279  return false;
4280  for (unsigned J = 0; J < BytesPerElement; ++J) {
4281  uint64_t Byte = (Value >> (J * 8)) & 0xff;
4282  if (Byte == 0xff)
4283  Mask |= 1ULL << ((E - I - 1) * BytesPerElement + J);
4284  else if (Byte != 0)
4285  return false;
4286  }
4287  }
4288  }
4289  return true;
4290 }
4291 
4292 // Try to load a vector constant in which BitsPerElement-bit value Value
4293 // is replicated to fill the vector. VT is the type of the resulting
4294 // constant, which may have elements of a different size from BitsPerElement.
4295 // Return the SDValue of the constant on success, otherwise return
4296 // an empty value.
4298  const SystemZInstrInfo *TII,
4299  const SDLoc &DL, EVT VT, uint64_t Value,
4300  unsigned BitsPerElement) {
4301  // Signed 16-bit values can be replicated using VREPI.
4302  // Mark the constants as opaque or DAGCombiner will convert back to
4303  // BUILD_VECTOR.
4304  int64_t SignedValue = SignExtend64(Value, BitsPerElement);
4305  if (isInt<16>(SignedValue)) {
4306  MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
4307  SystemZ::VectorBits / BitsPerElement);
4308  SDValue Op = DAG.getNode(
4309  SystemZISD::REPLICATE, DL, VecVT,
4310  DAG.getConstant(SignedValue, DL, MVT::i32, false, true /*isOpaque*/));
4311  return DAG.getNode(ISD::BITCAST, DL, VT, Op);
4312  }
4313  // See whether rotating the constant left some N places gives a value that
4314  // is one less than a power of 2 (i.e. all zeros followed by all ones).
4315  // If so we can use VGM.
4316  unsigned Start, End;
4317  if (TII->isRxSBGMask(Value, BitsPerElement, Start, End)) {
4318  // isRxSBGMask returns the bit numbers for a full 64-bit value,
4319  // with 0 denoting 1 << 63 and 63 denoting 1. Convert them to
4320  // bit numbers for an BitsPerElement value, so that 0 denotes
4321  // 1 << (BitsPerElement-1).
4322  Start -= 64 - BitsPerElement;
4323  End -= 64 - BitsPerElement;
4324  MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
4325  SystemZ::VectorBits / BitsPerElement);
4326  SDValue Op = DAG.getNode(
4327  SystemZISD::ROTATE_MASK, DL, VecVT,
4328  DAG.getConstant(Start, DL, MVT::i32, false, true /*isOpaque*/),
4329  DAG.getConstant(End, DL, MVT::i32, false, true /*isOpaque*/));
4330  return DAG.getNode(ISD::BITCAST, DL, VT, Op);
4331  }
4332  return SDValue();
4333 }
4334 
4335 // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
4336 // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
4337 // the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
4338 // would benefit from this representation and return it if so.
4340  BuildVectorSDNode *BVN) {
4341  EVT VT = BVN->getValueType(0);
4342  unsigned NumElements = VT.getVectorNumElements();
4343 
4344  // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
4345  // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
4346  // need a BUILD_VECTOR, add an additional placeholder operand for that
4347  // BUILD_VECTOR and store its operands in ResidueOps.
4348  GeneralShuffle GS(VT);
4350  bool FoundOne = false;
4351  for (unsigned I = 0; I < NumElements; ++I) {
4352  SDValue Op = BVN->getOperand(I);
4353  if (Op.getOpcode() == ISD::TRUNCATE)
4354  Op = Op.getOperand(0);
4355  if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4356  Op.getOperand(1).getOpcode() == ISD::Constant) {
4357  unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
4358  if (!GS.add(Op.getOperand(0), Elem))
4359  return SDValue();
4360  FoundOne = true;
4361  } else if (Op.isUndef()) {
4362  GS.addUndef();
4363  } else {
4364  if (!GS.add(SDValue(), ResidueOps.size()))
4365  return SDValue();
4366  ResidueOps.push_back(BVN->getOperand(I));
4367  }
4368  }
4369 
4370  // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
4371  if (!FoundOne)
4372  return SDValue();
4373 
4374  // Create the BUILD_VECTOR for the remaining elements, if any.
4375  if (!ResidueOps.empty()) {
4376  while (ResidueOps.size() < NumElements)
4377  ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
4378  for (auto &Op : GS.Ops) {
4379  if (!Op.getNode()) {
4380  Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
4381  break;
4382  }
4383  }
4384  }
4385  return GS.getNode(DAG, SDLoc(BVN));
4386 }
4387 
4388 // Combine GPR scalar values Elems into a vector of type VT.
4389 static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
4390  SmallVectorImpl<SDValue> &Elems) {
4391  // See whether there is a single replicated value.
4392  SDValue Single;
4393  unsigned int NumElements = Elems.size();
4394  unsigned int Count = 0;
4395  for (auto Elem : Elems) {
4396  if (!Elem.isUndef()) {
4397  if (!Single.getNode())
4398  Single = Elem;
4399  else if (Elem != Single) {
4400  Single = SDValue();
4401  break;
4402  }
4403  Count += 1;
4404  }
4405  }
4406  // There are three cases here:
4407  //
4408  // - if the only defined element is a loaded one, the best sequence
4409  // is a replicating load.
4410  //
4411  // - otherwise, if the only defined element is an i64 value, we will
4412  // end up with the same VLVGP sequence regardless of whether we short-cut
4413  // for replication or fall through to the later code.
4414  //
4415  // - otherwise, if the only defined element is an i32 or smaller value,
4416  // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
4417  // This is only a win if the single defined element is used more than once.
4418  // In other cases we're better off using a single VLVGx.
4419  if (Single.getNode() && (Count > 1 || Single.getOpcode() == ISD::LOAD))
4420  return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
4421 
4422  // If all elements are loads, use VLREP/VLEs (below).
4423  bool AllLoads = true;
4424  for (auto Elem : Elems)
4425  if (Elem.getOpcode() != ISD::LOAD || cast<LoadSDNode>(Elem)->isIndexed()) {
4426  AllLoads = false;
4427  break;
4428  }
4429 
4430  // The best way of building a v2i64 from two i64s is to use VLVGP.
4431  if (VT == MVT::v2i64 && !AllLoads)
4432  return joinDwords(DAG, DL, Elems[0], Elems[1]);
4433 
4434  // Use a 64-bit merge high to combine two doubles.
4435  if (VT == MVT::v2f64 && !AllLoads)
4436  return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
4437 
4438  // Build v4f32 values directly from the FPRs:
4439  //
4440  // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
4441  // V V VMRHF
4442  // <ABxx> <CDxx>
4443  // V VMRHG
4444  // <ABCD>
4445  if (VT == MVT::v4f32 && !AllLoads) {
4446  SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
4447  SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
4448  // Avoid unnecessary undefs by reusing the other operand.
4449  if (Op01.isUndef())
4450  Op01 = Op23;
4451  else if (Op23.isUndef())
4452  Op23 = Op01;
4453  // Merging identical replications is a no-op.
4454  if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
4455  return Op01;
4456  Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
4457  Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
4459  DL, MVT::v2i64, Op01, Op23);
4460  return DAG.getNode(ISD::BITCAST, DL, VT, Op);
4461  }
4462 
4463  // Collect the constant terms.
4465  SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false);
4466 
4467  unsigned NumConstants = 0;
4468  for (unsigned I = 0; I < NumElements; ++I) {
4469  SDValue Elem = Elems[I];
4470  if (Elem.getOpcode() == ISD::Constant ||
4471  Elem.getOpcode() == ISD::ConstantFP) {
4472  NumConstants += 1;
4473  Constants[I] = Elem;
4474  Done[I] = true;
4475  }
4476  }
4477  // If there was at least one constant, fill in the other elements of
4478  // Constants with undefs to get a full vector constant and use that
4479  // as the starting point.
4480  SDValue Result;
4481  SDValue ReplicatedVal;
4482  if (NumConstants > 0) {
4483  for (unsigned I = 0; I < NumElements; ++I)
4484  if (!Constants[I].getNode())
4485  Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
4486  Result = DAG.getBuildVector(VT, DL, Constants);
4487  } else {
4488  // Otherwise try to use VLREP or VLVGP to start the sequence in order to
4489  // avoid a false dependency on any previous contents of the vector
4490  // register.
4491 
4492  // Use a VLREP if at least one element is a load. Make sure to replicate
4493  // the load with the most elements having its value.
4494  std::map<const SDNode*, unsigned> UseCounts;
4495  SDNode *LoadMaxUses = nullptr;
4496  for (unsigned I = 0; I < NumElements; ++I)
4497  if (Elems[I].getOpcode() == ISD::LOAD &&
4498  cast<LoadSDNode>(Elems[I])->isUnindexed()) {
4499  SDNode *Ld = Elems[I].getNode();
4500  UseCounts[Ld]++;
4501  if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
4502  LoadMaxUses = Ld;
4503  }
4504  if (LoadMaxUses != nullptr) {
4505  ReplicatedVal = SDValue(LoadMaxUses, 0);
4506  Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
4507  } else {
4508  // Try to use VLVGP.
4509  unsigned I1 = NumElements / 2 - 1;
4510  unsigned I2 = NumElements - 1;
4511  bool Def1 = !Elems[I1].isUndef();
4512  bool Def2 = !Elems[I2].isUndef();
4513  if (Def1 || Def2) {
4514  SDValue Elem1 = Elems[Def1 ? I1 : I2];
4515  SDValue Elem2 = Elems[Def2 ? I2 : I1];
4516  Result = DAG.getNode(ISD::BITCAST, DL, VT,
4517  joinDwords(DAG, DL, Elem1, Elem2));
4518  Done[I1] = true;
4519  Done[I2] = true;
4520  } else
4521  Result = DAG.getUNDEF(VT);
4522  }
4523  }
4524 
4525  // Use VLVGx to insert the other elements.
4526  for (unsigned I = 0; I < NumElements; ++I)
4527  if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
4528  Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
4529  DAG.getConstant(I, DL, MVT::i32));
4530  return Result;
4531 }
4532 
4533 SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
4534  SelectionDAG &DAG) const {
4535  const SystemZInstrInfo *TII =
4536  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
4537  auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
4538  SDLoc DL(Op);
4539  EVT VT = Op.getValueType();
4540 
4541  if (BVN->isConstant()) {
4542  // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
4543  // preferred way of creating all-zero and all-one vectors so give it
4544  // priority over other methods below.
4545  uint64_t Mask = 0;
4546  if (tryBuildVectorByteMask(BVN, Mask)) {
4547  SDValue Op = DAG.getNode(
4549  DAG.getConstant(Mask, DL, MVT::i32, false, true /*isOpaque*/));
4550  return DAG.getNode(ISD::BITCAST, DL, VT, Op);
4551  }
4552 
4553  // Try using some form of replication.
4554  APInt SplatBits, SplatUndef;
4555  unsigned SplatBitSize;
4556  bool HasAnyUndefs;
4557  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
4558  8, true) &&
4559  SplatBitSize <= 64) {
4560  // First try assuming that any undefined bits above the highest set bit
4561  // and below the lowest set bit are 1s. This increases the likelihood of
4562  // being able to use a sign-extended element value in VECTOR REPLICATE
4563  // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
4564  uint64_t SplatBitsZ = SplatBits.getZExtValue();
4565  uint64_t SplatUndefZ = SplatUndef.getZExtValue();
4566  uint64_t Lower = (SplatUndefZ
4567  & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
4568  uint64_t Upper = (SplatUndefZ
4569  & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
4570  uint64_t Value = SplatBitsZ | Upper | Lower;
4571  SDValue Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value,
4572  SplatBitSize);
4573  if (Op.getNode())
4574  return Op;
4575 
4576  // Now try assuming that any undefined bits between the first and
4577  // last defined set bits are set. This increases the chances of
4578  // using a non-wraparound mask.
4579  uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
4580  Value = SplatBitsZ | Middle;
4581  Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, SplatBitSize);
4582  if (Op.getNode())
4583  return Op;
4584  }
4585 
4586  // Fall back to loading it from memory.
4587  return SDValue();
4588  }
4589 
4590  // See if we should use shuffles to construct the vector from other vectors.
4591  if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
4592  return Res;
4593 
4594  // Detect SCALAR_TO_VECTOR conversions.
4596  return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
4597 
4598  // Otherwise use buildVector to build the vector up from GPRs.
4599  unsigned NumElements = Op.getNumOperands();
4601  for (unsigned I = 0; I < NumElements; ++I)
4602  Ops[I] = Op.getOperand(I);
4603  return buildVector(DAG, DL, VT, Ops);
4604 }
4605 
4606 SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
4607  SelectionDAG &DAG) const {
4608  auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
4609  SDLoc DL(Op);
4610  EVT VT = Op.getValueType();
4611  unsigned NumElements = VT.getVectorNumElements();
4612 
4613  if (VSN->isSplat()) {
4614  SDValue Op0 = Op.getOperand(0);
4615  unsigned Index = VSN->getSplatIndex();
4616  assert(Index < VT.getVectorNumElements() &&
4617  "Splat index should be defined and in first operand");
4618  // See whether the value we're splatting is directly available as a scalar.
4619  if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
4620  Op0.getOpcode() == ISD::BUILD_VECTOR)
4621  return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
4622  // Otherwise keep it as a vector-to-vector operation.
4623  return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
4624  DAG.getConstant(Index, DL, MVT::i32));
4625  }
4626 
4627  GeneralShuffle GS(VT);
4628  for (unsigned I = 0; I < NumElements; ++I) {
4629  int Elt = VSN->getMaskElt(I);
4630  if (Elt < 0)
4631  GS.addUndef();
4632  else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
4633  unsigned(Elt) % NumElements))
4634  return SDValue();
4635  }
4636  return GS.getNode(DAG, SDLoc(VSN));
4637 }
4638 
4639 SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
4640  SelectionDAG &DAG) const {
4641  SDLoc DL(Op);
4642  // Just insert the scalar into element 0 of an undefined vector.
4643  return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
4644  Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
4645  Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
4646 }
4647 
4648 SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
4649  SelectionDAG &DAG) const {
4650  // Handle insertions of floating-point values.
4651  SDLoc DL(Op);
4652  SDValue Op0 = Op.getOperand(0);
4653  SDValue Op1 = Op.getOperand(1);
4654  SDValue Op2 = Op.getOperand(2);
4655  EVT VT = Op.getValueType();
4656 
4657  // Insertions into constant indices of a v2f64 can be done using VPDI.
4658  // However, if the inserted value is a bitcast or a constant then it's
4659  // better to use GPRs, as below.
4660  if (VT == MVT::v2f64 &&
4661  Op1.getOpcode() != ISD::BITCAST &&
4662  Op1.getOpcode() != ISD::ConstantFP &&
4663  Op2.getOpcode() == ISD::Constant) {
4664  uint64_t Index = cast<ConstantSDNode>(Op2)->getZExtValue();
4665  unsigned Mask = VT.getVectorNumElements() - 1;
4666  if (Index <= Mask)
4667  return Op;
4668  }
4669 
4670  // Otherwise bitcast to the equivalent integer form and insert via a GPR.
4672  MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
4673  SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
4674  DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
4675  DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
4676  return DAG.getNode(ISD::BITCAST, DL, VT, Res);
4677 }
4678 
4679 SDValue
4680 SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
4681  SelectionDAG &DAG) const {
4682  // Handle extractions of floating-point values.
4683  SDLoc DL(Op);
4684  SDValue Op0 = Op.getOperand(0);
4685  SDValue Op1 = Op.getOperand(1);
4686  EVT VT = Op.getValueType();
4687  EVT VecVT = Op0.getValueType();
4688 
4689  // Extractions of constant indices can be done directly.
4690  if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
4691  uint64_t Index = CIndexN->getZExtValue();
4692  unsigned Mask = VecVT.getVectorNumElements() - 1;
4693  if (Index <= Mask)
4694  return Op;
4695  }
4696 
4697  // Otherwise bitcast to the equivalent integer form and extract via a GPR.
4698  MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
4699  MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
4700  SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
4701  DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
4702  return DAG.getNode(ISD::BITCAST, DL, VT, Res);
4703 }
4704 
4705 SDValue
4706 SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
4707  unsigned UnpackHigh) const {
4708  SDValue PackedOp = Op.getOperand(0);
4709  EVT OutVT = Op.getValueType();
4710  EVT InVT = PackedOp.getValueType();
4711  unsigned ToBits = OutVT.getScalarSizeInBits();
4712  unsigned FromBits = InVT.getScalarSizeInBits();
4713  do {
4714  FromBits *= 2;
4715  EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
4716  SystemZ::VectorBits / FromBits);
4717  PackedOp = DAG.getNode(UnpackHigh, SDLoc(PackedOp), OutVT, PackedOp);
4718  } while (FromBits != ToBits);
4719  return PackedOp;
4720 }
4721 
4722 SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
4723  unsigned ByScalar) const {
4724  // Look for cases where a vector shift can use the *_BY_SCALAR form.
4725  SDValue Op0 = Op.getOperand(0);
4726  SDValue Op1 = Op.getOperand(1);
4727  SDLoc DL(Op);
4728  EVT VT = Op.getValueType();
4729  unsigned ElemBitSize = VT.getScalarSizeInBits();
4730 
4731  // See whether the shift vector is a splat represented as BUILD_VECTOR.
4732  if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
4733  APInt SplatBits, SplatUndef;
4734  unsigned SplatBitSize;
4735  bool HasAnyUndefs;
4736  // Check for constant splats. Use ElemBitSize as the minimum element
4737  // width and reject splats that need wider elements.
4738  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
4739  ElemBitSize, true) &&
4740  SplatBitSize == ElemBitSize) {
4741  SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
4742  DL, MVT::i32);
4743  return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
4744  }
4745  // Check for variable splats.
4746  BitVector UndefElements;
4747  SDValue Splat = BVN->getSplatValue(&UndefElements);
4748  if (Splat) {
4749  // Since i32 is the smallest legal type, we either need a no-op
4750  // or a truncation.
4751  SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
4752  return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
4753  }
4754  }
4755 
4756  // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
4757  // and the shift amount is directly available in a GPR.
4758  if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
4759  if (VSN->isSplat()) {
4760  SDValue VSNOp0 = VSN->getOperand(0);
4761  unsigned Index = VSN->getSplatIndex();
4762  assert(Index < VT.getVectorNumElements() &&
4763  "Splat index should be defined and in first operand");
4764  if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
4765  VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
4766  // Since i32 is the smallest legal type, we either need a no-op
4767  // or a truncation.
4768  SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
4769  VSNOp0.getOperand(Index));
4770  return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
4771  }
4772  }
4773  }
4774 
4775  // Otherwise just treat the current form as legal.
4776  return Op;
4777 }
4778 
4780  SelectionDAG &DAG) const {
4781  switch (Op.getOpcode()) {
4782  case ISD::FRAMEADDR:
4783  return lowerFRAMEADDR(Op, DAG);
4784  case ISD::RETURNADDR:
4785  return lowerRETURNADDR(Op, DAG);
4786  case ISD::BR_CC:
4787  return lowerBR_CC(Op, DAG);
4788  case ISD::SELECT_CC:
4789  return lowerSELECT_CC(Op, DAG);
4790  case ISD::SETCC:
4791  return lowerSETCC(Op, DAG);
4792  case ISD::GlobalAddress:
4793  return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
4794  case ISD::GlobalTLSAddress:
4795  return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
4796  case ISD::BlockAddress:
4797  return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
4798  case ISD::JumpTable:
4799  return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
4800  case ISD::ConstantPool:
4801  return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
4802  case ISD::BITCAST:
4803  return lowerBITCAST(Op, DAG);
4804  case ISD::VASTART:
4805  return lowerVASTART(Op, DAG);
4806  case ISD::VACOPY:
4807  return lowerVACOPY(Op, DAG);
4809  return lowerDYNAMIC_STACKALLOC(Op, DAG);
4811  return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
4812  case ISD::SMUL_LOHI:
4813  return lowerSMUL_LOHI(Op, DAG);
4814  case ISD::UMUL_LOHI:
4815  return lowerUMUL_LOHI(Op, DAG);
4816  case ISD::SDIVREM:
4817  return lowerSDIVREM(Op, DAG);
4818  case ISD::UDIVREM:
4819  return lowerUDIVREM(Op, DAG);
4820  case ISD::SADDO:
4821  case ISD::SSUBO:
4822  case ISD::UADDO:
4823  case ISD::USUBO:
4824  return lowerXALUO(Op, DAG);
4825  case ISD::ADDCARRY:
4826  case ISD::SUBCARRY:
4827  return lowerADDSUBCARRY(Op, DAG);
4828  case ISD::OR:
4829  return lowerOR(Op, DAG);
4830  case ISD::CTPOP:
4831  return lowerCTPOP(Op, DAG);
4832  case ISD::ATOMIC_FENCE:
4833  return lowerATOMIC_FENCE(Op, DAG);
4834  case ISD::ATOMIC_SWAP:
4835  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
4836  case ISD::ATOMIC_STORE:
4837  return lowerATOMIC_STORE(Op, DAG);
4838  case ISD::ATOMIC_LOAD:
4839  return lowerATOMIC_LOAD(Op, DAG);
4840  case ISD::ATOMIC_LOAD_ADD:
4841  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
4842  case ISD::ATOMIC_LOAD_SUB:
4843  return lowerATOMIC_LOAD_SUB(Op, DAG);
4844  case ISD::ATOMIC_LOAD_AND:
4845  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
4846  case ISD::ATOMIC_LOAD_OR:
4847  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
4848  case ISD::ATOMIC_LOAD_XOR:
4849  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
4850  case ISD::ATOMIC_LOAD_NAND:
4851  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
4852  case ISD::ATOMIC_LOAD_MIN:
4853  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
4854  case ISD::ATOMIC_LOAD_MAX:
4855  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
4856  case ISD::ATOMIC_LOAD_UMIN:
4857  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
4858  case ISD::ATOMIC_LOAD_UMAX:
4859  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
4861  return lowerATOMIC_CMP_SWAP(Op, DAG);
4862  case ISD::STACKSAVE:
4863  return lowerSTACKSAVE(Op, DAG);
4864  case ISD::STACKRESTORE:
4865  return lowerSTACKRESTORE(Op, DAG);
4866  case ISD::PREFETCH:
4867  return lowerPREFETCH(Op, DAG);
4869  return lowerINTRINSIC_W_CHAIN(Op, DAG);
4871  return lowerINTRINSIC_WO_CHAIN(Op, DAG);
4872  case ISD::BUILD_VECTOR:
4873  return lowerBUILD_VECTOR(Op, DAG);
4874  case ISD::VECTOR_SHUFFLE:
4875  return lowerVECTOR_SHUFFLE(Op, DAG);
4876  case ISD::SCALAR_TO_VECTOR:
4877  return lowerSCALAR_TO_VECTOR(Op, DAG);
4879  return lowerINSERT_VECTOR_ELT(Op, DAG);
4881  return lowerEXTRACT_VECTOR_ELT(Op, DAG);
4883  return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACK_HIGH);
4885  return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACKL_HIGH);
4886  case ISD::SHL:
4887  return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
4888  case ISD::SRL:
4889  return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
4890  case ISD::SRA:
4891  return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
4892  default:
4893  llvm_unreachable("Unexpected node to lower");
4894  }
4895 }
4896 
4897 // Lower operations with invalid operand or result types (currently used
4898 // only for 128-bit integer types).
4899 
4901  SDLoc DL(In);
4903  DAG.getIntPtrConstant(0, DL));
4905  DAG.getIntPtrConstant(1, DL));
4906  SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
4907  MVT::Untyped, Hi, Lo);
4908  return SDValue(Pair, 0);
4909 }
4910 
4912  SDLoc DL(In);
4913  SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
4914  DL, MVT::i64, In);
4915  SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
4916  DL, MVT::i64, In);
4917  return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
4918 }
4919 
4920 void
4923  SelectionDAG &DAG) const {
4924  switch (N->getOpcode()) {
4925  case ISD::ATOMIC_LOAD: {
4926  SDLoc DL(N);
4928  SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
4929  MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
4931  DL, Tys, Ops, MVT::i128, MMO);
4932  Results.push_back(lowerGR128ToI128(DAG, Res));
4933  Results.push_back(Res.getValue(1));
4934  break;
4935  }
4936  case ISD::ATOMIC_STORE: {
4937  SDLoc DL(N);
4938  SDVTList Tys = DAG.getVTList(MVT::Other);
4939  SDValue Ops[] = { N->getOperand(0),
4940  lowerI128ToGR128(DAG, N->getOperand(2)),
4941  N->getOperand(1) };
4942  MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
4944  DL, Tys, Ops, MVT::i128, MMO);
4945  // We have to enforce sequential consistency by performing a
4946  // serialization operation after the store.
4947  if (cast<AtomicSDNode>(N)->getOrdering() ==
4949  Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
4950  MVT::Other, Res), 0);
4951  Results.push_back(Res);
4952  break;
4953  }
4955  SDLoc DL(N);
4957  SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
4958  lowerI128ToGR128(DAG, N->getOperand(2)),
4959  lowerI128ToGR128(DAG, N->getOperand(3)) };
4960  MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
4962  DL, Tys, Ops, MVT::i128, MMO);
4963  SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
4965  Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
4966  Results.push_back(lowerGR128ToI128(DAG, Res));
4967  Results.push_back(Success);
4968  Results.push_back(Res.getValue(2));
4969  break;
4970  }
4971  default:
4972  llvm_unreachable("Unexpected node to lower");
4973  }
4974 }
4975 
4976 void
4979  SelectionDAG &DAG) const {
4980  return LowerOperationWrapper(N, Results, DAG);
4981 }
4982 
4983 const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
4984 #define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
4985  switch ((SystemZISD::NodeType)Opcode) {
4986  case SystemZISD::FIRST_NUMBER: break;
4987  OPCODE(RET_FLAG);
4988  OPCODE(CALL);
4989  OPCODE(SIBCALL);
4990  OPCODE(TLS_GDCALL);
4991  OPCODE(TLS_LDCALL);
4994  OPCODE(IABS);
4995  OPCODE(ICMP);
4996  OPCODE(FCMP);
4997  OPCODE(TM);
4998  OPCODE(BR_CCMASK);
5001  OPCODE(POPCNT);
5002  OPCODE(SMUL_LOHI);
5003  OPCODE(UMUL_LOHI);
5004  OPCODE(SDIVREM);
5005  OPCODE(UDIVREM);
5006  OPCODE(SADDO);
5007  OPCODE(SSUBO);
5008  OPCODE(UADDO);
5009  OPCODE(USUBO);
5010  OPCODE(ADDCARRY);
5011  OPCODE(SUBCARRY);
5012  OPCODE(GET_CCMASK);
5013  OPCODE(MVC);
5014  OPCODE(MVC_LOOP);
5015  OPCODE(NC);
5016  OPCODE(NC_LOOP);
5017  OPCODE(OC);
5018  OPCODE(OC_LOOP);
5019  OPCODE(XC);
5020  OPCODE(XC_LOOP);
5021  OPCODE(CLC);
5022  OPCODE(CLC_LOOP);
5023  OPCODE(STPCPY);
5024  OPCODE(STRCMP);
5026  OPCODE(IPM);
5027  OPCODE(MEMBARRIER);
5028  OPCODE(TBEGIN);
5030  OPCODE(TEND);
5031  OPCODE(BYTE_MASK);
5033  OPCODE(REPLICATE);
5035  OPCODE(SPLAT);
5036  OPCODE(MERGE_HIGH);
5037  OPCODE(MERGE_LOW);
5038  OPCODE(SHL_DOUBLE);
5040  OPCODE(PERMUTE);
5041  OPCODE(PACK);
5042  OPCODE(PACKS_CC);
5043  OPCODE(PACKLS_CC);
5046  OPCODE(UNPACK_LOW);
5051  OPCODE(VSUM);
5052  OPCODE(VICMPE);
5053  OPCODE(VICMPH);
5054  OPCODE(VICMPHL);
5055  OPCODE(VICMPES);
5056  OPCODE(VICMPHS);
5057  OPCODE(VICMPHLS);
5058  OPCODE(VFCMPE);
5059  OPCODE(VFCMPH);
5060  OPCODE(VFCMPHE);
5061  OPCODE(VFCMPES);
5062  OPCODE(VFCMPHS);
5063  OPCODE(VFCMPHES);
5064  OPCODE(VFTCI);
5065  OPCODE(VEXTEND);
5066  OPCODE(VROUND);
5067  OPCODE(VTM);
5068  OPCODE(VFAE_CC);
5069  OPCODE(VFAEZ_CC);
5070  OPCODE(VFEE_CC);
5071  OPCODE(VFEEZ_CC);
5072  OPCODE(VFENE_CC);
5073  OPCODE(VFENEZ_CC);
5074  OPCODE(VISTR_CC);
5075  OPCODE(VSTRC_CC);
5076  OPCODE(VSTRCZ_CC);
5077  OPCODE(TDC);
5094  OPCODE(LRV);
5095  OPCODE(STRV);
5096  OPCODE(PREFETCH);
5097  }
5098  return nullptr;
5099 #undef OPCODE
5100 }
5101 
5102 // Return true if VT is a vector whose elements are a whole number of bytes
5103 // in width. Also check for presence of vector support.
5104 bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
5105  if (!Subtarget.hasVector())
5106  return false;
5107 
5108  return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
5109 }
5110 
5111 // Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
5112 // producing a result of type ResVT. Op is a possibly bitcast version
5113 // of the input vector and Index is the index (based on type VecVT) that
5114 // should be extracted. Return the new extraction if a simplification
5115 // was possible or if Force is true.
5116 SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
5117  EVT VecVT, SDValue Op,
5118  unsigned Index,
5119  DAGCombinerInfo &DCI,
5120  bool Force) const {
5121  SelectionDAG &DAG = DCI.DAG;
5122 
5123  // The number of bytes being extracted.
5124  unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
5125 
5126  for (;;) {
5127  unsigned Opcode = Op.getOpcode();
5128  if (Opcode == ISD::BITCAST)
5129  // Look through bitcasts.
5130  Op = Op.getOperand(0);
5131  else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
5132  canTreatAsByteVector(Op.getValueType())) {
5133  // Get a VPERM-like permute mask and see whether the bytes covered
5134  // by the extracted element are a contiguous sequence from one
5135  // source operand.
5137  if (!getVPermMask(Op, Bytes))
5138  break;
5139  int First;
5140  if (!getShuffleInput(Bytes, Index * BytesPerElement,
5141  BytesPerElement, First))
5142  break;
5143  if (First < 0)
5144  return DAG.getUNDEF(ResVT);
5145  // Make sure the contiguous sequence starts at a multiple of the
5146  // original element size.
5147  unsigned Byte = unsigned(First) % Bytes.size();
5148  if (Byte % BytesPerElement != 0)
5149  break;
5150  // We can get the extracted value directly from an input.
5151  Index = Byte / BytesPerElement;
5152  Op = Op.getOperand(unsigned(First) / Bytes.size());
5153  Force = true;
5154  } else if (Opcode == ISD::BUILD_VECTOR &&
5155  canTreatAsByteVector(Op.getValueType())) {
5156  // We can only optimize this case if the BUILD_VECTOR elements are
5157  // at least as wide as the extracted value.
5158  EVT OpVT = Op.getValueType();
5159  unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
5160  if (OpBytesPerElement < BytesPerElement)
5161  break;
5162  // Make sure that the least-significant bit of the extracted value
5163  // is the least significant bit of an input.
5164  unsigned End = (Index + 1) * BytesPerElement;
5165  if (End % OpBytesPerElement != 0)
5166  break;
5167  // We're extracting the low part of one operand of the BUILD_VECTOR.
5168  Op = Op.getOperand(End / OpBytesPerElement - 1);
5169  if (!Op.getValueType().isInteger()) {
5171  Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
5172  DCI.AddToWorklist(Op.getNode());
5173  }
5174  EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
5175  Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
5176  if (VT != ResVT) {
5177  DCI.AddToWorklist(Op.getNode());
5178  Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
5179  }
5180  return Op;
5181  } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
5182  Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
5183  Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
5184  canTreatAsByteVector(Op.getValueType()) &&
5185  canTreatAsByteVector(Op.getOperand(0).getValueType())) {
5186  // Make sure that only the unextended bits are significant.
5187  EVT ExtVT = Op.getValueType();
5188  EVT OpVT = Op.getOperand(0).getValueType();
5189  unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
5190  unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
5191  unsigned Byte = Index * BytesPerElement;
5192  unsigned SubByte = Byte % ExtBytesPerElement;
5193  unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
5194  if (SubByte < MinSubByte ||
5195  SubByte + BytesPerElement > ExtBytesPerElement)
5196  break;
5197  // Get the byte offset of the unextended element
5198  Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
5199  // ...then add the byte offset relative to that element.
5200  Byte += SubByte - MinSubByte;
5201  if (Byte % BytesPerElement != 0)
5202  break;
5203  Op = Op.getOperand(0);
5204  Index = Byte / BytesPerElement;
5205  Force = true;
5206  } else
5207  break;
5208  }
5209  if (Force) {
5210  if (Op.getValueType() != VecVT) {
5211  Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
5212  DCI.AddToWorklist(Op.getNode());
5213  }
5214  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
5215  DAG.getConstant(Index, DL, MVT::i32));
5216  }
5217  return SDValue();
5218 }
5219 
5220 // Optimize vector operations in scalar value Op on the basis that Op
5221 // is truncated to TruncVT.
5222 SDValue SystemZTargetLowering::combineTruncateExtract(
5223  const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
5224  // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
5225  // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
5226  // of type TruncVT.
5227  if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5228  TruncVT.getSizeInBits() % 8 == 0) {
5229  SDValue Vec = Op.getOperand(0);
5230  EVT VecVT = Vec.getValueType();
5231  if (canTreatAsByteVector(VecVT)) {
5232  if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
5233  unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
5234  unsigned TruncBytes = TruncVT.getStoreSize();
5235  if (BytesPerElement % TruncBytes == 0) {
5236  // Calculate the value of Y' in the above description. We are
5237  // splitting the original elements into Scale equal-sized pieces
5238  // and for truncation purposes want the last (least-significant)
5239  // of these pieces for IndexN. This is easiest to do by calculating
5240  // the start index of the following element and then subtracting 1.
5241  unsigned Scale = BytesPerElement / TruncBytes;
5242  unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
5243 
5244  // Defer the creation of the bitcast from X to combineExtract,
5245  // which might be able to optimize the extraction.
5246  VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8),
5247  VecVT.getStoreSize() / TruncBytes);
5248  EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
5249  return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
5250  }
5251  }
5252  }
5253  }
5254  return SDValue();
5255 }
5256 
5257 SDValue SystemZTargetLowering::combineZERO_EXTEND(
5258  SDNode *N, DAGCombinerInfo &DCI) const {
5259  // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
5260  SelectionDAG &DAG = DCI.DAG;
5261  SDValue N0 = N->getOperand(0);
5262  EVT VT = N->getValueType(0);
5263  if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) {
5264  auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
5265  auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5266  if (TrueOp && FalseOp) {
5267  SDLoc DL(N0);
5268  SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
5269  DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
5270  N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
5271  SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
5272  // If N0 has multiple uses, change other uses as well.
5273  if (!N0.hasOneUse()) {
5274  SDValue TruncSelect =
5275  DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
5276  DCI.CombineTo(N0.getNode(), TruncSelect);
5277  }
5278  return NewSelect;
5279  }
5280  }
5281  return SDValue();
5282 }
5283 
5284 SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
5285  SDNode *N, DAGCombinerInfo &DCI) const {
5286  // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
5287  // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
5288  // into (select_cc LHS, RHS, -1, 0, COND)
5289  SelectionDAG &DAG = DCI.DAG;
5290  SDValue N0 = N->getOperand(0);
5291  EVT VT = N->getValueType(0);
5292  EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
5293  if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
5294  N0 = N0.getOperand(0);
5295  if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
5296  SDLoc DL(N0);
5297  SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
5298  DAG.getConstant(-1, DL, VT), DAG.getConstant(0, DL, VT),
5299  N0.getOperand(2) };
5300  return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
5301  }
5302  return SDValue();
5303 }
5304 
5305 SDValue SystemZTargetLowering::combineSIGN_EXTEND(
5306  SDNode *N, DAGCombinerInfo &DCI) const {
5307  // Convert (sext (ashr (shl X, C1), C2)) to
5308  // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
5309  // cheap as narrower ones.
5310  SelectionDAG &DAG = DCI.DAG;
5311  SDValue N0 = N->getOperand(0);
5312  EVT VT = N->getValueType(0);
5313  if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
5314  auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5315  SDValue Inner = N0.getOperand(0);
5316  if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
5317  if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
5318  unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
5319  unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
5320  unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
5321  EVT ShiftVT = N0.getOperand(1).getValueType();
5322  SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
5323  Inner.getOperand(0));
5324  SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
5325  DAG.getConstant(NewShlAmt, SDLoc(Inner),
5326  ShiftVT));
5327  return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
5328  DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
5329  }
5330  }
5331  }
5332  return SDValue();
5333 }
5334 
5335 SDValue SystemZTargetLowering::combineMERGE(
5336  SDNode *N, DAGCombinerInfo &DCI) const {
5337  SelectionDAG &DAG = DCI.DAG;
5338  unsigned Opcode = N->getOpcode();
5339  SDValue Op0 = N->getOperand(0);
5340  SDValue Op1 = N->getOperand(1);
5341  if (Op0.getOpcode() == ISD::BITCAST)
5342  Op0 = Op0.getOperand(0);
5343  if (Op0.getOpcode() == SystemZISD::BYTE_MASK &&
5344  cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0) {
5345  // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
5346  // for v4f32.
5347  if (Op1 == N->getOperand(0))
5348  return Op1;
5349  // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
5350  EVT VT = Op1.getValueType();
5351  unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
5352  if (ElemBytes <= 4) {
5353  Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
5356  EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
5357  SystemZ::VectorBytes / ElemBytes / 2);
5358  if (VT != InVT) {
5359  Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
5360  DCI.AddToWorklist(Op1.getNode());
5361  }
5362  SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
5363  DCI.AddToWorklist(Op.getNode());
5364  return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
5365  }
5366  }
5367  return SDValue();
5368 }
5369 
5370 SDValue SystemZTargetLowering::combineLOAD(
5371  SDNode *N, DAGCombinerInfo &DCI) const {
5372  SelectionDAG &DAG = DCI.DAG;
5373  EVT LdVT = N->getValueType(0);
5374  if (LdVT.isVector() || LdVT.isInteger())
5375  return SDValue();
5376  // Transform a scalar load that is REPLICATEd as well as having other
5377  // use(s) to the form where the other use(s) use the first element of the
5378  // REPLICATE instead of the load. Otherwise instruction selection will not
5379  // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
5380  // point loads.
5381 
5382  SDValue Replicate;
5383  SmallVector<SDNode*, 8> OtherUses;
5384  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
5385  UI != UE; ++UI) {
5386  if (UI->getOpcode() == SystemZISD::REPLICATE) {
5387  if (Replicate)
5388  return SDValue(); // Should never happen
5389  Replicate = SDValue(*UI, 0);
5390  }
5391  else if (UI.getUse().getResNo() == 0)
5392  OtherUses.push_back(*UI);
5393  }
5394  if (!Replicate || OtherUses.empty())
5395  return SDValue();
5396 
5397  SDLoc DL(N);
5398  SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
5399  Replicate, DAG.getConstant(0, DL, MVT::i32));
5400  // Update uses of the loaded Value while preserving old chains.
5401  for (SDNode *U : OtherUses) {
5403  for (SDValue Op : U->ops())
5404  Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
5405  DAG.UpdateNodeOperands(U, Ops);
5406  }
5407  return SDValue(N, 0);
5408 }
5409 
5410 SDValue SystemZTargetLowering::combineSTORE(
5411  SDNode *N, DAGCombinerInfo &DCI) const {
5412  SelectionDAG &DAG = DCI.DAG;
5413  auto *SN = cast<StoreSDNode>(N);
5414  auto &Op1 = N->getOperand(1);
5415  EVT MemVT = SN->getMemoryVT();
5416  // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
5417  // for the extraction to be done on a vMiN value, so that we can use VSTE.
5418  // If X has wider elements then convert it to:
5419  // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
5420  if (MemVT.isInteger() && SN->isTruncatingStore()) {
5421  if (SDValue Value =
5422  combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
5423  DCI.AddToWorklist(Value.getNode());
5424 
5425  // Rewrite the store with the new form of stored value.
5426  return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
5427  SN->getBasePtr(), SN->getMemoryVT(),
5428  SN->getMemOperand());
5429  }
5430  }
5431  // Combine STORE (BSWAP) into STRVH/STRV/STRVG
5432  if (!SN->isTruncatingStore() &&
5433  Op1.getOpcode() == ISD::BSWAP &&
5434  Op1.getNode()->hasOneUse() &&
5435  (Op1.getValueType() == MVT::i16 ||
5436  Op1.getValueType() == MVT::i32 ||
5437  Op1.getValueType() == MVT::i64)) {
5438 
5439  SDValue BSwapOp = Op1.getOperand(0);
5440 
5441  if (BSwapOp.getValueType() == MVT::i16)
5442  BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
5443 
5444  SDValue Ops[] = {
5445  N->getOperand(0), BSwapOp, N->getOperand(2)
5446  };
5447 
5448  return
5450  Ops, MemVT, SN->getMemOperand());
5451  }
5452  return SDValue();
5453 }
5454 
5455 SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
5456  SDNode *N, DAGCombinerInfo &DCI) const {
5457 
5458  if (!Subtarget.hasVector())
5459  return SDValue();
5460 
5461  // Try to simplify a vector extraction.
5462  if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
5463  SDValue Op0 = N->getOperand(0);
5464  EVT VecVT = Op0.getValueType();
5465  return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
5466  IndexN->getZExtValue(), DCI, false);
5467  }
5468  return SDValue();
5469 }
5470 
5471 SDValue SystemZTargetLowering::combineJOIN_DWORDS(
5472  SDNode *N, DAGCombinerInfo &DCI) const {
5473  SelectionDAG &DAG = DCI.DAG;
5474  // (join_dwords X, X) == (replicate X)
5475  if (N->getOperand(0) == N->getOperand(1))
5476  return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
5477  N->getOperand(0));
5478  return SDValue();
5479 }
5480 
5481 SDValue SystemZTargetLowering::combineFP_ROUND(
5482  SDNode *N, DAGCombinerInfo &DCI) const {
5483  // (fpround (extract_vector_elt X 0))
5484  // (fpround (extract_vector_elt X 1)) ->
5485  // (extract_vector_elt (VROUND X) 0)
5486  // (extract_vector_elt (VROUND X) 2)
5487  //
5488  // This is a special case since the target doesn't really support v2f32s.
5489  SelectionDAG &DAG = DCI.DAG;
5490  SDValue Op0 = N->getOperand(0);
5491  if (N->getValueType(0) == MVT::f32 &&
5492  Op0.hasOneUse() &&
5494  Op0.getOperand(0).getValueType() == MVT::v2f64 &&
5495  Op0.getOperand(1).getOpcode() == ISD::Constant &&
5496  cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
5497  SDValue Vec = Op0.getOperand(0);
5498  for (auto *U : Vec->uses()) {
5499  if (U != Op0.getNode() &&
5500  U->hasOneUse() &&
5501  U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5502  U->getOperand(0) == Vec &&
5503  U->getOperand(1).getOpcode() == ISD::Constant &&
5504  cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) {
5505  SDValue OtherRound = SDValue(*U->use_begin(), 0);
5506  if (OtherRound.getOpcode() == ISD::FP_ROUND &&
5507  OtherRound.getOperand(0) == SDValue(U, 0) &&
5508  OtherRound.getValueType() == MVT::f32) {
5509  SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
5510  MVT::v4f32, Vec);
5511  DCI.AddToWorklist(VRound.getNode());
5512  SDValue Extract1 =
5514  VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
5515  DCI.AddToWorklist(Extract1.getNode());
5516  DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
5517  SDValue Extract0 =
5519  VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
5520  return Extract0;
5521  }
5522  }
5523  }
5524  }
5525  return SDValue();
5526 }
5527 
5528 SDValue SystemZTargetLowering::combineFP_EXTEND(
5529  SDNode *N, DAGCombinerInfo &DCI) const {
5530  // (fpextend (extract_vector_elt X 0))
5531  // (fpextend (extract_vector_elt X 2)) ->
5532  // (extract_vector_elt (VEXTEND X) 0)
5533  // (extract_vector_elt (VEXTEND X) 1)
5534  //
5535  // This is a special case since the target doesn't really support v2f32s.
5536  SelectionDAG &DAG = DCI.DAG;
5537  SDValue Op0 = N->getOperand(0);
5538  if (N->getValueType(0) == MVT::f64 &&
5539  Op0.hasOneUse() &&
5541  Op0.getOperand(0).getValueType() == MVT::v4f32 &&
5542  Op0.getOperand(1).getOpcode() == ISD::Constant &&
5543  cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
5544  SDValue Vec = Op0.getOperand(0);
5545  for (auto *U : Vec->uses()) {
5546  if (U != Op0.getNode() &&
5547  U->hasOneUse() &&
5548  U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5549  U->getOperand(0) == Vec &&
5550  U->getOperand(1).getOpcode() == ISD::Constant &&
5551  cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 2) {
5552  SDValue OtherExtend = SDValue(*U->use_begin(), 0);
5553  if (OtherExtend.getOpcode() == ISD::FP_EXTEND &&
5554  OtherExtend.getOperand(0) == SDValue(U, 0) &&
5555  OtherExtend.getValueType() == MVT::f64) {
5556  SDValue VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
5557  MVT::v2f64, Vec);
5558  DCI.AddToWorklist(VExtend.getNode());
5559  SDValue Extract1 =
5561  VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
5562  DCI.AddToWorklist(Extract1.getNode());
5563  DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
5564  SDValue Extract0 =
5566  VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
5567  return Extract0;
5568  }
5569  }
5570  }
5571  }
5572  return SDValue();
5573 }
5574 
5575 SDValue SystemZTargetLowering::combineBSWAP(
5576  SDNode *N, DAGCombinerInfo &DCI) const {
5577  SelectionDAG &DAG = DCI.DAG;
5578  // Combine BSWAP (LOAD) into LRVH/LRV/LRVG
5579  if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
5580  N->getOperand(0).hasOneUse() &&
5581  (N->getValueType(0) == MVT::i16 || N->getValueType(0) == MVT::i32 ||
5582  N->getValueType(0) == MVT::i64)) {
5583  SDValue Load = N->getOperand(0);
5584  LoadSDNode *LD = cast<LoadSDNode>(Load);
5585 
5586  // Create the byte-swapping load.
5587  SDValue Ops[] = {
5588  LD->getChain(), // Chain
5589  LD->getBasePtr() // Ptr
5590  };
5591  EVT LoadVT = N->getValueType(0);
5592  if (LoadVT == MVT::i16)
5593  LoadVT = MVT::i32;
5594  SDValue BSLoad =
5596  DAG.getVTList(LoadVT, MVT::Other),
5597  Ops, LD->getMemoryVT(), LD->getMemOperand());
5598 
5599  // If this is an i16 load, insert the truncate.
5600  SDValue ResVal = BSLoad;
5601  if (N->getValueType(0) == MVT::i16)
5602  ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
5603 
5604  // First, combine the bswap away. This makes the value produced by the
5605  // load dead.
5606  DCI.CombineTo(N, ResVal);
5607 
5608  // Next, combine the load away, we give it a bogus result value but a real
5609  // chain result. The result value is dead because the bswap is dead.
5610  DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
5611 
5612  // Return N so it doesn't get rechecked!
5613  return SDValue(N, 0);
5614  }
5615  return SDValue();
5616 }
5617 
5618 static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
5619  // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
5620  // set by the CCReg instruction using the CCValid / CCMask masks,
5621  // If the CCReg instruction is itself a ICMP testing the condition
5622  // code set by some other instruction, see whether we can directly
5623  // use that condition code.
5624 
5625  // Verify that we have an ICMP against some constant.
5626  if (CCValid != SystemZ::CCMASK_ICMP)
5627  return false;
5628  auto *ICmp = CCReg.getNode();
5629  if (ICmp->getOpcode() != SystemZISD::ICMP)
5630  return false;
5631  auto *CompareLHS = ICmp->getOperand(0).getNode();
5632  auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
5633  if (!CompareRHS)
5634  return false;
5635 
5636  // Optimize the case where CompareLHS is a SELECT_CCMASK.
5637  if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
5638  // Verify that we have an appropriate mask for a EQ or NE comparison.
5639  bool Invert = false;
5640  if (CCMask == SystemZ::CCMASK_CMP_NE)
5641  Invert = !Invert;
5642  else if (CCMask != SystemZ::CCMASK_CMP_EQ)
5643  return false;
5644 
5645  // Verify that the ICMP compares against one of select values.
5646  auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0));
5647  if (!TrueVal)
5648  return false;
5649  auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
5650  if (!FalseVal)
5651  return false;
5652  if (CompareRHS->getZExtValue() == FalseVal->getZExtValue())
5653  Invert = !Invert;
5654  else if (CompareRHS->getZExtValue() != TrueVal->getZExtValue())
5655  return false;
5656 
5657  // Compute the effective CC mask for the new branch or select.
5658  auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2));
5659  auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3));
5660  if (!NewCCValid || !NewCCMask)
5661  return false;
5662  CCValid = NewCCValid->getZExtValue();
5663  CCMask = NewCCMask->getZExtValue();
5664  if (Invert)
5665  CCMask ^= CCValid;
5666 
5667  // Return the updated CCReg link.
5668  CCReg = CompareLHS->getOperand(4);
5669  return true;
5670  }
5671 
5672  // Optimize the case where CompareRHS is (SRA (SHL (IPM))).
5673  if (CompareLHS->getOpcode() == ISD::SRA) {
5674  auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
5675  if (!SRACount || SRACount->getZExtValue() != 30)
5676  return false;
5677  auto *SHL = CompareLHS->getOperand(0).getNode();
5678  if (SHL->getOpcode() != ISD::SHL)
5679  return false;
5680  auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1));
5681  if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC)
5682  return false;
5683  auto *IPM = SHL->getOperand(0).getNode();
5684  if (IPM->getOpcode() != SystemZISD::IPM)
5685  return false;
5686 
5687  // Avoid introducing CC spills (because SRA would clobber CC).
5688  if (!CompareLHS->hasOneUse())
5689  return false;
5690  // Verify that the ICMP compares against zero.
5691  if (CompareRHS->getZExtValue() != 0)
5692  return false;
5693 
5694  // Compute the effective CC mask for the new branch or select.
5695  switch (CCMask) {
5696  case SystemZ::CCMASK_CMP_EQ: break;
5697  case SystemZ::CCMASK_CMP_NE: break;
5698  case SystemZ::CCMASK_CMP_LT: CCMask = SystemZ::CCMASK_CMP_GT; break;
5699  case SystemZ::CCMASK_CMP_GT: CCMask = SystemZ::CCMASK_CMP_LT; break;
5700  case SystemZ::CCMASK_CMP_LE: CCMask = SystemZ::CCMASK_CMP_GE; break;
5701  case SystemZ::CCMASK_CMP_GE: CCMask = SystemZ::CCMASK_CMP_LE; break;
5702  default: return false;
5703  }
5704 
5705  // Return the updated CCReg link.
5706  CCReg = IPM->getOperand(0);
5707  return true;
5708  }
5709 
5710  return false;
5711 }
5712 
5713 SDValue SystemZTargetLowering::combineBR_CCMASK(
5714  SDNode *N, DAGCombinerInfo &DCI) const {
5715  SelectionDAG &DAG = DCI.DAG;
5716 
5717  // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
5718  auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
5719  auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
5720  if (!CCValid || !CCMask)
5721  return SDValue();
5722 
5723  int CCValidVal = CCValid->getZExtValue();
5724  int CCMaskVal = CCMask->getZExtValue();
5725  SDValue Chain = N->getOperand(0);
5726  SDValue CCReg = N->getOperand(4);
5727 
5728  if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
5729  return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
5730  Chain,
5731  DAG.getConstant(CCValidVal, SDLoc(N), MVT::i32),
5732  DAG.getConstant(CCMaskVal, SDLoc(N), MVT::i32),
5733  N->getOperand(3), CCReg);
5734  return SDValue();
5735 }
5736 
5737 SDValue SystemZTargetLowering::combineSELECT_CCMASK(
5738  SDNode *N, DAGCombinerInfo &DCI) const {
5739  SelectionDAG &DAG = DCI.DAG;
5740 
5741  // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
5742  auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
5743  auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
5744  if (!CCValid || !CCMask)
5745  return SDValue();
5746 
5747  int CCValidVal = CCValid->getZExtValue();
5748  int CCMaskVal = CCMask->getZExtValue();
5749  SDValue CCReg = N->getOperand(4);
5750 
5751  if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
5752  return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),
5753  N->getOperand(0),
5754  N->getOperand(1),
5755  DAG.getConstant(CCValidVal, SDLoc(N), MVT::i32),
5756  DAG.getConstant(CCMaskVal, SDLoc(N), MVT::i32),
5757  CCReg);
5758  return SDValue();
5759 }
5760 
5761 
5762 SDValue SystemZTargetLowering::combineGET_CCMASK(
5763  SDNode *N, DAGCombinerInfo &DCI) const {
5764 
5765  // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
5766  auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
5767  auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
5768  if (!CCValid || !CCMask)
5769  return SDValue();
5770  int CCValidVal = CCValid->getZExtValue();
5771  int CCMaskVal = CCMask->getZExtValue();
5772 
5773  SDValue Select = N->getOperand(0);
5774  if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
5775  return SDValue();
5776 
5777  auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
5778  auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
5779  if (!SelectCCValid || !SelectCCMask)
5780  return SDValue();
5781  int SelectCCValidVal = SelectCCValid->getZExtValue();
5782  int SelectCCMaskVal = SelectCCMask->getZExtValue();
5783 
5784  auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
5785  auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
5786  if (!TrueVal || !FalseVal)
5787  return SDValue();
5788  if (TrueVal->getZExtValue() != 0 && FalseVal->getZExtValue() == 0)
5789  ;
5790  else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() != 0)
5791  SelectCCMaskVal ^= SelectCCValidVal;
5792  else
5793  return SDValue();
5794 
5795  if (SelectCCValidVal & ~CCValidVal)
5796  return SDValue();
5797  if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
5798  return SDValue();
5799 
5800  return Select->getOperand(4);
5801 }
5802 
5803 SDValue SystemZTargetLowering::combineIntDIVREM(
5804  SDNode *N, DAGCombinerInfo &DCI) const {
5805  SelectionDAG &DAG = DCI.DAG;
5806  EVT VT = N->getValueType(0);
5807  // In the case where the divisor is a vector of constants a cheaper
5808  // sequence of instructions can replace the divide. BuildSDIV is called to
5809  // do this during DAG combining, but it only succeeds when it can build a
5810  // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
5811  // since it is not Legal but Custom it can only happen before
5812  // legalization. Therefore we must scalarize this early before Combine
5813  // 1. For widened vectors, this is already the result of type legalization.
5814  if (VT.isVector() && isTypeLegal(VT) &&
5816  return DAG.UnrollVectorOp(N);
5817  return SDValue();
5818 }
5819 
5821  DAGCombinerInfo &DCI) const {
5822  switch(N->getOpcode()) {
5823  default: break;
5824  case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
5825  case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
5826  case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
5828  case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
5829  case ISD::LOAD: return combineLOAD(N, DCI);
5830  case ISD::STORE: return combineSTORE(N, DCI);
5831  case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
5832  case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
5833  case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
5834  case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
5835  case ISD::BSWAP: return combineBSWAP(N, DCI);
5836  case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
5837  case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
5838  case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
5839  case ISD::SDIV:
5840  case ISD::UDIV:
5841  case ISD::SREM:
5842  case ISD::UREM: return combineIntDIVREM(N, DCI);
5843  }
5844 
5845  return SDValue();
5846 }
5847 
5848 // Return the demanded elements for the OpNo source operand of Op. DemandedElts
5849 // are for Op.
5850 static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
5851  unsigned OpNo) {
5852  EVT VT = Op.getValueType();
5853  unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
5854  APInt SrcDemE;
5855  unsigned Opcode = Op.getOpcode();
5856  if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
5857  unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5858  switch (Id) {
5859  case Intrinsic::s390_vpksh: // PACKS
5860  case Intrinsic::s390_vpksf:
5861  case Intrinsic::s390_vpksg:
5862  case Intrinsic::s390_vpkshs: // PACKS_CC
5865  case Intrinsic::s390_vpklsh: // PACKLS
5868  case Intrinsic::s390_vpklshs: // PACKLS_CC
5871  // VECTOR PACK truncates the elements of two source vectors into one.
5872  SrcDemE = DemandedElts;
5873  if (OpNo == 2)
5874  SrcDemE.lshrInPlace(NumElts / 2);
5875  SrcDemE = SrcDemE.trunc(NumElts / 2);
5876  break;
5877  // VECTOR UNPACK extends half the elements of the source vector.
5878  case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
5879  case Intrinsic::s390_vuphh:
5880  case Intrinsic::s390_vuphf:
5881  case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
5884  SrcDemE = APInt(NumElts * 2, 0);
5885  SrcDemE.insertBits(DemandedElts, 0);
5886  break;
5887  case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
5889  case Intrinsic::s390_vuplf:
5890  case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
5893  SrcDemE = APInt(NumElts * 2, 0);
5894  SrcDemE.insertBits(DemandedElts, NumElts);
5895  break;
5896  case Intrinsic::s390_vpdi: {
5897  // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
5898  SrcDemE = APInt(NumElts, 0);
5899  if (!DemandedElts[OpNo - 1])
5900  break;
5901  unsigned Mask = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
5902  unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
5903  // Demand input element 0 or 1, given by the mask bit value.
5904  SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
5905  break;
5906  }
5907  case Intrinsic::s390_vsldb: {
5908  // VECTOR SHIFT LEFT DOUBLE BY BYTE
5909  assert(VT == MVT::v16i8 && "Unexpected type.");
5910  unsigned FirstIdx = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
5911  assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
5912  unsigned NumSrc0Els = 16 - FirstIdx;
5913  SrcDemE = APInt(NumElts, 0);
5914  if (OpNo == 1) {
5915  APInt DemEls = DemandedElts.trunc(NumSrc0Els);
5916  SrcDemE.insertBits(DemEls, FirstIdx);
5917  } else {
5918  APInt DemEls = DemandedElts.lshr(NumSrc0Els);
5919  SrcDemE.insertBits(DemEls, 0);
5920  }
5921  break;
5922  }
5923  case Intrinsic::s390_vperm:
5924  SrcDemE = APInt(NumElts, 1);
5925  break;
5926  default:
5927  llvm_unreachable("Unhandled intrinsic.");
5928  break;
5929  }
5930  } else {
5931  switch (Opcode) {
5933  // Scalar operand.
5934  SrcDemE = APInt(1, 1);
5935  break;
5937  SrcDemE = DemandedElts;
5938  break;
5939  default:
5940  llvm_unreachable("Unhandled opcode.");
5941  break;
5942  }
5943  }
5944  return SrcDemE;
5945 }
5946 
5947 static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
5948  const APInt &DemandedElts,
5949  const SelectionDAG &DAG, unsigned Depth,
5950  unsigned OpNo) {
5951  APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
5952  APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
5953  KnownBits LHSKnown =
5954  DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
5955  KnownBits RHSKnown =
5956  DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
5957  Known.Zero = LHSKnown.Zero & RHSKnown.Zero;
5958  Known.One = LHSKnown.One & RHSKnown.One;
5959 }
5960 
5961 void
5963  KnownBits &Known,
5964  const APInt &DemandedElts,
5965  const SelectionDAG &DAG,
5966  unsigned Depth) const {
5967  Known.resetAll();
5968 
5969  // Intrinsic CC result is returned in the two low bits.
5970  unsigned tmp0, tmp1; // not used
5971  if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, tmp0, tmp1)) {
5972  Known.Zero.setBitsFrom(2);
5973  return;
5974  }
5975  EVT VT = Op.getValueType();
5976  if (Op.getResNo() != 0 || VT == MVT::Untyped)
5977  return;
5978  assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&
5979  "KnownBits does not match VT in bitwidth");
5980  assert ((!VT.isVector() ||
5981  (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&
5982  "DemandedElts does not match VT number of elements");
5983  unsigned BitWidth = Known.getBitWidth();
5984  unsigned Opcode = Op.getOpcode();
5985  if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
5986  bool IsLogical = false;
5987  unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5988  switch (Id) {
5989  case Intrinsic::s390_vpksh: // PACKS
5990  case Intrinsic::s390_vpksf:
5991  case Intrinsic::s390_vpksg:
5992  case Intrinsic::s390_vpkshs: // PACKS_CC
5995  case Intrinsic::s390_vpklsh: // PACKLS
5998  case Intrinsic::s390_vpklshs: // PACKLS_CC
6001  case Intrinsic::s390_vpdi:
6002  case Intrinsic::s390_vsldb:
6003  case Intrinsic::s390_vperm:
6004  computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
6005  break;
6006  case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
6009  case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
6012  IsLogical = true;
6014  case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
6015  case Intrinsic::s390_vuphh:
6016  case Intrinsic::s390_vuphf:
6017  case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
6019  case Intrinsic::s390_vuplf: {
6020  SDValue SrcOp = Op.getOperand(1);
6021  unsigned SrcBitWidth = SrcOp.getScalarValueSizeInBits();
6022  APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
6023  Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
6024  if (IsLogical) {
6025  Known = Known.zext(BitWidth);
6026  Known.Zero.setBitsFrom(SrcBitWidth);
6027  } else
6028  Known = Known.sext(BitWidth);
6029  break;
6030  }
6031  default:
6032  break;
6033  }
6034  } else {
6035  switch (Opcode) {
6038  computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
6039  break;
6040  case SystemZISD::REPLICATE: {
6041  SDValue SrcOp = Op.getOperand(0);
6042  Known = DAG.computeKnownBits(SrcOp, Depth + 1);
6043  if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp))
6044  Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
6045  break;
6046  }
6047  default:
6048  break;
6049  }
6050  }
6051 
6052  // Known has the width of the source operand(s). Adjust if needed to match
6053  // the passed bitwidth.
6054  if (Known.getBitWidth() != BitWidth)
6055  Known = Known.zextOrTrunc(BitWidth);
6056 }
6057 
6058 static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
6059  const SelectionDAG &DAG, unsigned Depth,
6060  unsigned OpNo) {
6061  APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
6062  unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
6063  if (LHS == 1) return 1; // Early out.
6064  APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
6065  unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
6066  if (RHS == 1) return 1; // Early out.
6067  unsigned Common = std::min(LHS, RHS);
6068  unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
6069  EVT VT = Op.getValueType();
6070  unsigned VTBits = VT.getScalarSizeInBits();
6071  if (SrcBitWidth > VTBits) { // PACK
6072  unsigned SrcExtraBits = SrcBitWidth - VTBits;
6073  if (Common > SrcExtraBits)
6074  return (Common - SrcExtraBits);
6075  return 1;
6076  }
6077  assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");
6078  return Common;
6079 }
6080 
6081 unsigned
6083  SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
6084  unsigned Depth) const {
6085  if (Op.getResNo() != 0)
6086  return 1;
6087  unsigned Opcode = Op.getOpcode();
6088  if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
6089  unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
6090  switch (Id) {
6091  case Intrinsic::s390_vpksh: // PACKS
6092  case Intrinsic::s390_vpksf:
6093  case Intrinsic::s390_vpksg:
6094  case Intrinsic::s390_vpkshs: // PACKS_CC
6097  case Intrinsic::s390_vpklsh: // PACKLS
6100  case Intrinsic::s390_vpklshs: // PACKLS_CC
6103  case Intrinsic::s390_vpdi:
6104  case Intrinsic::s390_vsldb:
6105  case Intrinsic::s390_vperm:
6106  return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
6107  case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
6108  case Intrinsic::s390_vuphh:
6109  case Intrinsic::s390_vuphf:
6110  case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
6112  case Intrinsic::s390_vuplf: {
6113  SDValue PackedOp = Op.getOperand(1);
6114  APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
6115  unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
6116  EVT VT = Op.getValueType();
6117  unsigned VTBits = VT.getScalarSizeInBits();
6118  Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
6119  return Tmp;
6120  }
6121  default:
6122  break;
6123  }
6124  } else {
6125  switch (Opcode) {
6127  return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
6128  default:
6129  break;
6130  }
6131  }
6132 
6133  return 1;
6134 }
6135 
6136 //===----------------------------------------------------------------------===//
6137 // Custom insertion
6138 //===----------------------------------------------------------------------===//
6139 
6140 // Create a new basic block after MBB.
6142  MachineFunction &MF = *MBB->getParent();
6144  MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB);
6145  return NewMBB;
6146 }
6147 
6148 // Split MBB after MI and return the new block (the one that contains
6149 // instructions after MI).
6151  MachineBasicBlock *MBB) {
6152  MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
6153  NewMBB->splice(NewMBB->begin(), MBB,
6154  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
6155  NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
6156  return NewMBB;
6157 }
6158 
6159 // Split MBB before MI and return the new block (the one that contains MI).
6161  MachineBasicBlock *MBB) {
6162  MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
6163  NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end());
6164  NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
6165  return NewMBB;
6166 }
6167 
6168 // Force base value Base into a register before MI. Return the register.
6170  const SystemZInstrInfo *TII) {
6171  if (Base.isReg())
6172  return Base.getReg();
6173 
6174  MachineBasicBlock *MBB = MI.getParent();
6175  MachineFunction &MF = *MBB->getParent();
6177 
6178  unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
6179  BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
6180  .add(Base)
6181  .addImm(0)
6182  .addReg(0);
6183  return Reg;
6184 }
6185 
6186 // The CC operand of MI might be missing a kill marker because there
6187 // were multiple uses of CC, and ISel didn't know which to mark.
6188 // Figure out whether MI should have had a kill marker.
6190  // Scan forward through BB for a use/def of CC.
6192  for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {
6193  const MachineInstr& mi = *miI;
6194  if (mi.readsRegister(SystemZ::CC))
6195  return false;
6196  if (mi.definesRegister(SystemZ::CC))
6197  break; // Should have kill-flag - update below.
6198  }
6199 
6200  // If we hit the end of the block, check whether CC is live into a
6201  // successor.
6202  if (miI == MBB->end()) {
6203  for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI)
6204  if ((*SI)->isLiveIn(SystemZ::CC))
6205  return false;
6206  }
6207 
6208  return true;
6209 }
6210 
6211 // Return true if it is OK for this Select pseudo-opcode to be cascaded
6212 // together with other Select pseudo-opcodes into a single basic-block with
6213 // a conditional jump around it.
6215  switch (MI.getOpcode()) {
6216  case SystemZ::Select32:
6217  case SystemZ::Select64:
6218  case SystemZ::SelectF32:
6219  case SystemZ::SelectF64:
6220  case SystemZ::SelectF128:
6221  case SystemZ::SelectVR32:
6222  case SystemZ::SelectVR64:
6223  case SystemZ::SelectVR128:
6224  return true;
6225 
6226  default:
6227  return false;
6228  }
6229 }
6230 
6231 // Helper function, which inserts PHI functions into SinkMBB:
6232 // %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
6233 // where %FalseValue(i) and %TrueValue(i) are taken from the consequent Selects
6234 // in [MIItBegin, MIItEnd) range.
6237  MachineBasicBlock *TrueMBB,
6238  MachineBasicBlock *FalseMBB,
6239  MachineBasicBlock *SinkMBB) {
6240  MachineFunction *MF = TrueMBB->getParent();
6241  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
6242 
6243  unsigned CCValid = MIItBegin->getOperand(3).getImm();
6244  unsigned CCMask = MIItBegin->getOperand(4).getImm();
6245  DebugLoc DL = MIItBegin->getDebugLoc();
6246 
6247  MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
6248 
6249  // As we are creating the PHIs, we have to be careful if there is more than
6250  // one. Later Selects may reference the results of earlier Selects, but later
6251  // PHIs have to reference the individual true/false inputs from earlier PHIs.
6252  // That also means that PHI construction must work forward from earlier to
6253  // later, and that the code must maintain a mapping from earlier PHI's
6254  // destination registers, and the registers that went into the PHI.
6256 
6257  for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) {
6258  unsigned DestReg = MIIt->getOperand(0).getReg();
6259  unsigned TrueReg = MIIt->getOperand(1).getReg();
6260  unsigned FalseReg = MIIt->getOperand(2).getReg();
6261 
6262  // If this Select we are generating is the opposite condition from
6263  // the jump we generated, then we have to swap the operands for the
6264  // PHI that is going to be generated.
6265  if (MIIt->getOperand(4).getImm() == (CCValid ^ CCMask))
6266  std::swap(TrueReg, FalseReg);
6267 
6268  if (RegRewriteTable.find(TrueReg) != RegRewriteTable.end())
6269  TrueReg = RegRewriteTable[TrueReg].first;
6270 
6271  if (RegRewriteTable.find(FalseReg) != RegRewriteTable.end())
6272  FalseReg = RegRewriteTable[FalseReg].second;
6273 
6274  BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)
6275  .addReg(TrueReg).addMBB(TrueMBB)
6276  .addReg(FalseReg).addMBB(FalseMBB);
6277 
6278  // Add this PHI to the rewrite table.
6279  RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
6280  }
6281 }
6282 
6283 // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
6285 SystemZTargetLowering::emitSelect(MachineInstr &MI,
6286  MachineBasicBlock *MBB) const {
6287  const SystemZInstrInfo *TII =
6288  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
6289 
6290  unsigned CCValid = MI.getOperand(3).getImm();
6291  unsigned CCMask = MI.getOperand(4).getImm();
6292  DebugLoc DL = MI.getDebugLoc();
6293 
6294  // If we have a sequence of Select* pseudo instructions using the
6295  // same condition code value, we want to expand all of them into
6296  // a single pair of basic blocks using the same condition.
6297  MachineInstr *LastMI = &MI;
6298  MachineBasicBlock::iterator NextMIIt =
6299  std::next(MachineBasicBlock::iterator(MI));
6300 
6301  if (isSelectPseudo(MI))
6302  while (NextMIIt != MBB->end() && isSelectPseudo(*NextMIIt) &&
6303  NextMIIt->getOperand(3).getImm() == CCValid &&
6304  (NextMIIt->getOperand(4).getImm() == CCMask ||
6305  NextMIIt->getOperand(4).getImm() == (CCValid ^ CCMask))) {
6306  LastMI = &*NextMIIt;
6307  ++NextMIIt;
6308  }
6309 
6310  MachineBasicBlock *StartMBB = MBB;
6311  MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB);
6312  MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
6313 
6314  // Unless CC was killed in the last Select instruction, mark it as
6315  // live-in to both FalseMBB and JoinMBB.
6316  if (!LastMI->killsRegister(SystemZ::CC) && !checkCCKill(*LastMI, JoinMBB)) {
6317  FalseMBB->addLiveIn(SystemZ::CC);
6318  JoinMBB->addLiveIn(SystemZ::CC);
6319  }
6320 
6321  // StartMBB:
6322  // BRC CCMask, JoinMBB
6323  // # fallthrough to FalseMBB
6324  MBB = StartMBB;
6325  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
6326  .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
6327  MBB->addSuccessor(JoinMBB);
6328  MBB->addSuccessor(FalseMBB);
6329 
6330  // FalseMBB:
6331  // # fallthrough to JoinMBB
6332  MBB = FalseMBB;
6333  MBB->addSuccessor(JoinMBB);
6334 
6335  // JoinMBB:
6336  // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
6337  // ...
6338  MBB = JoinMBB;
6340  MachineBasicBlock::iterator MIItEnd =
6341  std::next(MachineBasicBlock::iterator(LastMI));
6342  createPHIsForSelects(MIItBegin, MIItEnd, StartMBB, FalseMBB, MBB);
6343 
6344  StartMBB->erase(MIItBegin, MIItEnd);
6345  return JoinMBB;
6346 }
6347 
6348 // Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
6349 // StoreOpcode is the store to use and Invert says whether the store should
6350 // happen when the condition is false rather than true. If a STORE ON
6351 // CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
6352 MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
6353  MachineBasicBlock *MBB,
6354  unsigned StoreOpcode,
6355  unsigned STOCOpcode,
6356  bool Invert) const {
6357  const SystemZInstrInfo *TII =
6358  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
6359 
6360  unsigned SrcReg = MI.getOperand(0).getReg();
6361  MachineOperand Base = MI.getOperand(1);
6362  int64_t Disp = MI.getOperand(2).getImm();
6363  unsigned IndexReg = MI.getOperand(3).getReg();
6364  unsigned CCValid = MI.getOperand(4).getImm();
6365  unsigned CCMask = MI.getOperand(5).getImm();
6366  DebugLoc DL = MI.getDebugLoc();
6367 
6368  StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
6369 
6370  // Use STOCOpcode if possible. We could use different store patterns in
6371  // order to avoid matching the index register, but the performance trade-offs
6372  // might be more complicated in that case.
6373  if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
6374  if (Invert)
6375  CCMask ^= CCValid;
6376 
6377  // ISel pattern matching also adds a load memory operand of the same
6378  // address, so take special care to find the storing memory operand.
6379  MachineMemOperand *MMO = nullptr;
6380  for (auto *I : MI.memoperands())
6381  if (I->isStore()) {
6382  MMO = I;
6383  break;
6384  }
6385 
6386  BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
6387  .addReg(SrcReg)
6388  .add(Base)
6389  .addImm(Disp)
6390  .addImm(CCValid)
6391  .addImm(CCMask)
6392  .addMemOperand(MMO);
6393 
6394  MI.eraseFromParent();
6395  return MBB;
6396  }
6397 
6398  // Get the condition needed to branch around the store.
6399  if (!Invert)
6400  CCMask ^= CCValid;
6401 
6402  MachineBasicBlock *StartMBB = MBB;
6403  MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB);
6404  MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
6405 
6406  // Unless CC was killed in the CondStore instruction, mark it as
6407  // live-in to both FalseMBB and JoinMBB.
6408  if (!MI.killsRegister(SystemZ::CC) && !checkCCKill(MI, JoinMBB)) {
6409  FalseMBB->addLiveIn(SystemZ::CC);
6410  JoinMBB->addLiveIn(SystemZ::CC);
6411  }
6412 
6413  // StartMBB:
6414  // BRC CCMask, JoinMBB
6415  // # fallthrough to FalseMBB
6416  MBB = StartMBB;
6417  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
6418  .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
6419  MBB->addSuccessor(JoinMBB);
6420  MBB->addSuccessor(FalseMBB);
6421 
6422  // FalseMBB:
6423  // store %SrcReg, %Disp(%Index,%Base)
6424  // # fallthrough to JoinMBB
6425  MBB = FalseMBB;
6426  BuildMI(MBB, DL, TII->get(StoreOpcode))
6427  .addReg(SrcReg)
6428  .add(Base)
6429  .addImm(Disp)
6430  .addReg(IndexReg);
6431  MBB->addSuccessor(JoinMBB);
6432 
6433  MI.eraseFromParent();
6434  return JoinMBB;
6435 }
6436 
6437 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_*
6438 // or ATOMIC_SWAP{,W} instruction MI. BinOpcode is the instruction that
6439 // performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}.
6440 // BitSize is the width of the field in bits, or 0 if this is a partword
6441 // ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize
6442 // is one of the operands. Invert says whether the field should be
6443 // inverted after performing BinOpcode (e.g. for NAND).
6444 MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
6445  MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
6446  unsigned BitSize, bool Invert) const {
6447  MachineFunction &MF = *MBB->getParent();
6448  const SystemZInstrInfo *TII =
6449  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
6451  bool IsSubWord = (BitSize < 32);
6452 
6453  // Extract the operands. Base can be a register or a frame index.
6454  // Src2 can be a register or immediate.
6455  unsigned Dest = MI.getOperand(0).getReg();
6457  int64_t Disp = MI.getOperand(2).getImm();
6459  unsigned BitShift = (IsSubWord ? MI.getOperand(4).getReg() : 0);
6460  unsigned NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : 0);
6461  DebugLoc DL = MI.getDebugLoc();
6462  if (IsSubWord)
6463  BitSize = MI.getOperand(6).getImm();
6464 
6465  // Subword operations use 32-bit registers.
6466  const TargetRegisterClass *RC = (BitSize <= 32 ?
6467  &SystemZ::GR32BitRegClass :
6468  &SystemZ::GR64BitRegClass);
6469  unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG;
6470  unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
6471 
6472  // Get the right opcodes for the displacement.
6473  LOpcode = TII->getOpcodeForOffset(LOpcode, Disp);
6474  CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
6475  assert(LOpcode && CSOpcode && "Displacement out of range");
6476 
6477  // Create virtual registers for temporary results.
6478  unsigned OrigVal = MRI.createVirtualRegister(RC);
6479  unsigned OldVal = MRI.createVirtualRegister(RC);
6480  unsigned NewVal = (BinOpcode || IsSubWord ?
6481  MRI.createVirtualRegister(RC) : Src2.getReg());
6482  unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
6483  unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
6484 
6485  // Insert a basic block for the main loop.
6486  MachineBasicBlock *StartMBB = MBB;
6487  MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
6488  MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
6489 
6490  // StartMBB:
6491  // ...
6492  // %OrigVal = L Disp(%Base)
6493  // # fall through to LoopMMB
6494  MBB = StartMBB;
6495  BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
6496  MBB->addSuccessor(LoopMBB);
6497 
6498  // LoopMBB:
6499  // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
6500  // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
6501  // %RotatedNewVal = OP %RotatedOldVal, %Src2
6502  // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
6503  // %Dest = CS %OldVal, %NewVal, Disp(%Base)
6504  // JNE LoopMBB
6505  // # fall through to DoneMMB
6506  MBB = LoopMBB;
6507  BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
6508  .addReg(OrigVal).addMBB(StartMBB)
6509  .addReg(Dest).addMBB(LoopMBB);
6510  if (IsSubWord)
6511  BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
6512  .addReg(OldVal).addReg(BitShift).addImm(0);
6513  if (Invert) {
6514  // Perform the operation normally and then invert every bit of the field.
6515  unsigned Tmp = MRI.createVirtualRegister(RC);
6516  BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
6517  if (BitSize <= 32)
6518  // XILF with the upper BitSize bits set.
6519  BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
6520  .addReg(Tmp).addImm(-1U << (32 - BitSize));
6521  else {
6522  // Use LCGR and add -1 to the result, which is more compact than
6523  // an XILF, XILH pair.
6524  unsigned Tmp2 = MRI.createVirtualRegister(RC);
6525  BuildMI(MBB, DL, TII->get(SystemZ::LCGR), Tmp2).addReg(Tmp);
6526  BuildMI(MBB, DL, TII->get(SystemZ::AGHI), RotatedNewVal)
6527  .addReg(Tmp2).addImm(-1);
6528  }
6529  } else if (BinOpcode)
6530  // A simply binary operation.
6531  BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
6532  .addReg(RotatedOldVal)
6533  .add(Src2);
6534  else if (IsSubWord)
6535  // Use RISBG to rotate Src2 into position and use it to replace the
6536  // field in RotatedOldVal.
6537  BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
6538  .addReg(RotatedOldVal).addReg(Src2.getReg())
6539  .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
6540  if (IsSubWord)
6541  BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
6542  .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
6543  BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
6544  .addReg(OldVal)
6545  .addReg(NewVal)
6546  .add(Base)
6547  .addImm(Disp);
6548  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
6550  MBB->addSuccessor(LoopMBB);
6551  MBB->addSuccessor(DoneMBB);
6552 
6553  MI.eraseFromParent();
6554  return DoneMBB;
6555 }
6556 
6557 // Implement EmitInstrWithCustomInserter for pseudo
6558 // ATOMIC_LOAD{,W}_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
6559 // instruction that should be used to compare the current field with the
6560 // minimum or maximum value. KeepOldMask is the BRC condition-code mask
6561 // for when the current field should be kept. BitSize is the width of
6562 // the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction.
6563 MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
6564  MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
6565  unsigned KeepOldMask, unsigned BitSize) const {
6566  MachineFunction &MF = *MBB->getParent();
6567  const SystemZInstrInfo *TII =
6568  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
6570  bool IsSubWord = (BitSize < 32);
6571 
6572  // Extract the operands. Base can be a register or a frame index.
6573  unsigned Dest = MI.getOperand(0).getReg();
6575  int64_t Disp = MI.getOperand(2).getImm();
6576  unsigned Src2 = MI.getOperand(3).getReg();
6577  unsigned BitShift = (IsSubWord ? MI.getOperand(4).getReg() : 0);
6578  unsigned NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : 0);
6579  DebugLoc DL = MI.getDebugLoc();
6580  if (IsSubWord)
6581  BitSize = MI.getOperand(6).getImm();
6582 
6583  // Subword operations use 32-bit registers.
6584  const TargetRegisterClass *RC = (BitSize <= 32 ?
6585  &SystemZ::GR32BitRegClass :
6586  &SystemZ::GR64BitRegClass);
6587  unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG;
6588  unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
6589 
6590  // Get the right opcodes for the displacement.
6591  LOpcode = TII->getOpcodeForOffset(LOpcode, Disp);
6592  CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
6593  assert(LOpcode && CSOpcode && "Displacement out of range");
6594 
6595  // Create virtual registers for temporary results.
6596  unsigned OrigVal = MRI.createVirtualRegister(RC);
6597  unsigned OldVal = MRI.createVirtualRegister(RC);
6598  unsigned NewVal = MRI.createVirtualRegister(RC);
6599  unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
6600  unsigned RotatedAltVal = (IsSubWord ? MRI.createVirtualRegister(RC) : Src2);
6601  unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
6602 
6603  // Insert 3 basic blocks for the loop.
6604  MachineBasicBlock *StartMBB = MBB;
6605  MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
6606  MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
6607  MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB);
6608  MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB);
6609 
6610  // StartMBB:
6611  // ...
6612  // %OrigVal = L Disp(%Base)
6613  // # fall through to LoopMMB
6614  MBB = StartMBB;
6615  BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
6616  MBB->addSuccessor(LoopMBB);
6617 
6618  // LoopMBB:
6619  // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
6620  // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
6621  // CompareOpcode %RotatedOldVal, %Src2
6622  // BRC KeepOldMask, UpdateMBB
6623  MBB = LoopMBB;
6624  BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
6625  .addReg(OrigVal).addMBB(StartMBB)
6626  .addReg(Dest).addMBB(UpdateMBB);
6627  if (IsSubWord)
6628  BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
6629  .addReg(OldVal).addReg(BitShift).addImm(0);
6630  BuildMI(MBB, DL, TII->get(CompareOpcode))
6631  .addReg(RotatedOldVal).addReg(Src2);
6632  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
6633  .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
6634  MBB->addSuccessor(UpdateMBB);
6635  MBB->addSuccessor(UseAltMBB);
6636 
6637  // UseAltMBB:
6638  // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
6639  // # fall through to UpdateMMB
6640  MBB = UseAltMBB;
6641  if (IsSubWord)
6642  BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
6643  .addReg(RotatedOldVal).addReg(Src2)
6644  .addImm(32).addImm(31 + BitSize).addImm(0);
6645  MBB->addSuccessor(UpdateMBB);
6646 
6647  // UpdateMBB:
6648  // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
6649  // [ %RotatedAltVal, UseAltMBB ]
6650  // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
6651  // %Dest = CS %OldVal, %NewVal, Disp(%Base)
6652  // JNE LoopMBB
6653  // # fall through to DoneMMB
6654  MBB = UpdateMBB;
6655  BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
6656  .addReg(RotatedOldVal).addMBB(LoopMBB)
6657  .addReg(RotatedAltVal).addMBB(UseAltMBB);
6658  if (IsSubWord)
6659  BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
6660  .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
6661  BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
6662  .addReg(OldVal)
6663  .addReg(NewVal)
6664  .add(Base)
6665  .addImm(Disp);
6666  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
6668  MBB->addSuccessor(LoopMBB);
6669  MBB->addSuccessor(DoneMBB);
6670 
6671  MI.eraseFromParent();
6672  return DoneMBB;
6673 }
6674 
6675 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW
6676 // instruction MI.
6678 SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
6679  MachineBasicBlock *MBB) const {
6680 
6681  MachineFunction &MF = *MBB->getParent();
6682  const SystemZInstrInfo *TII =
6683  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
6685 
6686  // Extract the operands. Base can be a register or a frame index.
6687  unsigned Dest = MI.getOperand(0).getReg();
6689  int64_t Disp = MI.getOperand(2).getImm();
6690  unsigned OrigCmpVal = MI.getOperand(3).getReg();
6691  unsigned OrigSwapVal = MI.getOperand(4).getReg();
6692  unsigned BitShift = MI.getOperand(5).getReg();
6693  unsigned NegBitShift = MI.getOperand(6).getReg();
6694  int64_t BitSize = MI.getOperand(7).getImm();
6695  DebugLoc DL = MI.getDebugLoc();
6696 
6697  const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
6698 
6699  // Get the right opcodes for the displacement.
6700  unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
6701  unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
6702  assert(LOpcode && CSOpcode && "Displacement out of range");
6703 
6704  // Create virtual registers for temporary results.
6705  unsigned OrigOldVal = MRI.createVirtualRegister(RC);
6706  unsigned OldVal = MRI.createVirtualRegister(RC);
6707  unsigned CmpVal = MRI.createVirtualRegister(RC);
6708  unsigned SwapVal = MRI.createVirtualRegister(RC);
6709  unsigned StoreVal = MRI.createVirtualRegister(RC);
6710  unsigned RetryOldVal = MRI.createVirtualRegister(RC);
6711  unsigned RetryCmpVal = MRI.createVirtualRegister(RC);
6712  unsigned RetrySwapVal = MRI.createVirtualRegister(RC);
6713 
6714  // Insert 2 basic blocks for the loop.
6715  MachineBasicBlock *StartMBB = MBB;
6716  MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
6717  MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
6718  MachineBasicBlock *SetMBB = emitBlockAfter(LoopMBB);
6719 
6720  // StartMBB:
6721  // ...
6722  // %OrigOldVal = L Disp(%Base)
6723  // # fall through to LoopMMB
6724  MBB = StartMBB;
6725  BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
6726  .add(Base)
6727  .addImm(Disp)
6728  .addReg(0);
6729  MBB->addSuccessor(LoopMBB);
6730 
6731  // LoopMBB:
6732  // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
6733  // %CmpVal = phi [ %OrigCmpVal, EntryBB ], [ %RetryCmpVal, SetMBB ]
6734  // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
6735  // %Dest = RLL %OldVal, BitSize(%BitShift)
6736  // ^^ The low BitSize bits contain the field
6737  // of interest.
6738  // %RetryCmpVal = RISBG32 %CmpVal, %Dest, 32, 63-BitSize, 0
6739  // ^^ Replace the upper 32-BitSize bits of the
6740  // comparison value with those that we loaded,
6741  // so that we can use a full word comparison.
6742  // CR %Dest, %RetryCmpVal
6743  // JNE DoneMBB
6744  // # Fall through to SetMBB
6745  MBB = LoopMBB;
6746  BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
6747  .addReg(OrigOldVal).addMBB(StartMBB)
6748  .addReg(RetryOldVal).addMBB(SetMBB);
6749  BuildMI(MBB, DL, TII->get(SystemZ::PHI), CmpVal)
6750  .addReg(OrigCmpVal).addMBB(StartMBB)
6751  .addReg(RetryCmpVal).addMBB(SetMBB);
6752  BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
6753  .addReg(OrigSwapVal).addMBB(StartMBB)
6754  .addReg(RetrySwapVal).addMBB(SetMBB);
6755  BuildMI(MBB, DL, TII->get(SystemZ::RLL), Dest)
6756  .addReg(OldVal).addReg(BitShift).addImm(BitSize);
6757  BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetryCmpVal)
6758  .addReg(CmpVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0);
6759  BuildMI(MBB, DL, TII->get(SystemZ::CR))
6760  .addReg(Dest).addReg(RetryCmpVal);
6761  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
6762  .addImm(SystemZ::CCMASK_ICMP)
6764  MBB->addSuccessor(DoneMBB);
6765  MBB->addSuccessor(SetMBB);
6766 
6767  // SetMBB:
6768  // %RetrySwapVal = RISBG32 %SwapVal, %Dest, 32, 63-BitSize, 0
6769  // ^^ Replace the upper 32-BitSize bits of the new
6770  // value with those that we loaded.
6771  // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
6772  // ^^ Rotate the new field to its proper position.
6773  // %RetryOldVal = CS %Dest, %StoreVal, Disp(%Base)
6774  // JNE LoopMBB
6775  // # fall through to ExitMMB
6776  MBB = SetMBB;
6777  BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
6778  .addReg(SwapVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0);
6779  BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
6780  .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
6781  BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
6782  .addReg(OldVal)
6783  .addReg(StoreVal)
6784  .add(Base)
6785  .addImm(Disp);
6786  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
6788  MBB->addSuccessor(LoopMBB);
6789  MBB->addSuccessor(DoneMBB);
6790 
6791  // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
6792  // to the block after the loop. At this point, CC may have been defined
6793  // either by the CR in LoopMBB or by the CS in SetMBB.
6794  if (!MI.registerDefIsDead(SystemZ::CC))
6795  DoneMBB->addLiveIn(SystemZ::CC);
6796 
6797  MI.eraseFromParent();
6798  return DoneMBB;
6799 }
6800 
6801 // Emit a move from two GR64s to a GR128.
6803 SystemZTargetLowering::emitPair128(MachineInstr &MI,
6804  MachineBasicBlock *MBB) const {
6805  MachineFunction &MF = *MBB->getParent();
6806  const SystemZInstrInfo *TII =
6807  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
6809  DebugLoc DL = MI.getDebugLoc();
6810 
6811  unsigned Dest = MI.getOperand(0).getReg();
6812  unsigned Hi = MI.getOperand(1).getReg();
6813  unsigned Lo = MI.getOperand(2).getReg();
6814  unsigned Tmp1 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
6815  unsigned Tmp2 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
6816 
6817  BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Tmp1);
6818  BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Tmp2)
6819  .addReg(Tmp1).addReg(Hi).addImm(SystemZ::subreg_h64);
6820  BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
6821  .addReg(Tmp2).addReg(Lo).addImm(SystemZ::subreg_l64);
6822 
6823  MI.eraseFromParent();
6824  return MBB;
6825 }
6826 
6827 // Emit an extension from a GR64 to a GR128. ClearEven is true
6828 // if the high register of the GR128 value must be cleared or false if
6829 // it's "don't care".
6830 MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
6831  MachineBasicBlock *MBB,
6832  bool ClearEven) const {
6833  MachineFunction &MF = *MBB->getParent();
6834  const SystemZInstrInfo *TII =
6835  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
6837  DebugLoc DL = MI.getDebugLoc();
6838 
6839  unsigned Dest = MI.getOperand(0).getReg();
6840  unsigned Src = MI.getOperand(1).getReg();
6841  unsigned In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
6842 
6843  BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
6844  if (ClearEven) {
6845  unsigned NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
6846  unsigned Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
6847 
6848  BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
6849  .addImm(0);
6850  BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
6851  .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
6852  In128 = NewIn128;
6853  }
6854  BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
6855  .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
6856 
6857  MI.eraseFromParent();
6858  return MBB;
6859 }
6860 
6861 MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
6862  MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
6863  MachineFunction &MF = *MBB->getParent();
6864  const SystemZInstrInfo *TII =
6865  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
6867  DebugLoc DL = MI.getDebugLoc();
6868 
6869  MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
6870  uint64_t DestDisp = MI.getOperand(1).getImm();
6871  MachineOperand SrcBase = earlyUseOperand(MI.getOperand(2));
6872  uint64_t SrcDisp = MI.getOperand(3).getImm();
6873  uint64_t Length = MI.getOperand(4).getImm();
6874 
6875  // When generating more than one CLC, all but the last will need to
6876  // branch to the end when a difference is found.
6877  MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ?
6878  splitBlockAfter(MI, MBB) : nullptr);
6879 
6880  // Check for the loop form, in which operand 5 is the trip count.
6881  if (MI.getNumExplicitOperands() > 5) {
6882  bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
6883 
6884  uint64_t StartCountReg = MI.getOperand(5).getReg();
6885  uint64_t StartSrcReg = forceReg(MI, SrcBase, TII);
6886  uint64_t StartDestReg = (HaveSingleBase ? StartSrcReg :
6887  forceReg(MI, DestBase, TII));
6888 
6889  const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
6890  uint64_t ThisSrcReg = MRI.createVirtualRegister(RC);
6891  uint64_t ThisDestReg = (HaveSingleBase ? ThisSrcReg :
6892  MRI.createVirtualRegister(RC));
6893  uint64_t NextSrcReg = MRI.createVirtualRegister(RC);
6894  uint64_t NextDestReg = (HaveSingleBase ? NextSrcReg :
6895  MRI.createVirtualRegister(RC));
6896 
6897  RC = &SystemZ::GR64BitRegClass;
6898  uint64_t ThisCountReg = MRI.createVirtualRegister(RC);
6899  uint64_t NextCountReg = MRI.createVirtualRegister(RC);
6900 
6901  MachineBasicBlock *StartMBB = MBB;
6902  MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
6903  MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
6904  MachineBasicBlock *NextMBB = (EndMBB ? emitBlockAfter(LoopMBB) : LoopMBB);
6905 
6906  // StartMBB:
6907  // # fall through to LoopMMB
6908  MBB->addSuccessor(LoopMBB);
6909 
6910  // LoopMBB:
6911  // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
6912  // [ %NextDestReg, NextMBB ]
6913  // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
6914  // [ %NextSrcReg, NextMBB ]
6915  // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
6916  // [ %NextCountReg, NextMBB ]
6917  // ( PFD 2, 768+DestDisp(%ThisDestReg) )
6918  // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
6919  // ( JLH EndMBB )
6920  //
6921  // The prefetch is used only for MVC. The JLH is used only for CLC.
6922  MBB = LoopMBB;
6923 
6924  BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
6925  .addReg(StartDestReg).addMBB(StartMBB)
6926  .addReg(NextDestReg).addMBB(NextMBB);
6927  if (!HaveSingleBase)
6928  BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
6929  .addReg(StartSrcReg).addMBB(StartMBB)
6930  .addReg(NextSrcReg).addMBB(NextMBB);
6931  BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
6932  .addReg(StartCountReg).addMBB(StartMBB)
6933  .addReg(NextCountReg).addMBB(NextMBB);
6934  if (Opcode == SystemZ::MVC)
6935  BuildMI(MBB, DL, TII->get(SystemZ::PFD))
6936  .addImm(SystemZ::PFD_WRITE)
6937  .addReg(ThisDestReg).addImm(DestDisp + 768).addReg(0);
6938  BuildMI(MBB, DL, TII->get(Opcode))
6939  .addReg(ThisDestReg).addImm(DestDisp).addImm(256)
6940  .addReg(ThisSrcReg).addImm(SrcDisp);
6941  if (EndMBB) {
6942  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
6944  .addMBB(EndMBB);
6945  MBB->addSuccessor(EndMBB);
6946  MBB->addSuccessor(NextMBB);
6947  }
6948 
6949  // NextMBB:
6950  // %NextDestReg = LA 256(%ThisDestReg)
6951  // %NextSrcReg = LA 256(%ThisSrcReg)
6952  // %NextCountReg = AGHI %ThisCountReg, -1
6953  // CGHI %NextCountReg, 0
6954  // JLH LoopMBB
6955  // # fall through to DoneMMB
6956  //
6957  // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
6958  MBB = NextMBB;
6959 
6960  BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
6961  .addReg(ThisDestReg).addImm(256).addReg(0);
6962  if (!HaveSingleBase)
6963  BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
6964  .addReg(ThisSrcReg).addImm(256).addReg(0);
6965  BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
6966  .addReg(ThisCountReg).addImm(-1);
6967  BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
6968  .addReg(NextCountReg).addImm(0);
6969  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
6971  .addMBB(LoopMBB);
6972  MBB->addSuccessor(LoopMBB);
6973  MBB->addSuccessor(DoneMBB);
6974 
6975  DestBase = MachineOperand::CreateReg(NextDestReg, false);
6976  SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
6977  Length &= 255;
6978  if (EndMBB && !Length)
6979  // If the loop handled the whole CLC range, DoneMBB will be empty with
6980  // CC live-through into EndMBB, so add it as live-in.
6981  DoneMBB->addLiveIn(SystemZ::CC);
6982  MBB = DoneMBB;
6983  }
6984  // Handle any remaining bytes with straight-line code.
6985  while (Length > 0) {
6986  uint64_t ThisLength = std::min(Length, uint64_t(256));
6987  // The previous iteration might have created out-of-range displacements.
6988  // Apply them using LAY if so.
6989  if (!isUInt<12>(DestDisp)) {
6990  unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
6991  BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg)
6992  .add(DestBase)
6993  .addImm(DestDisp)
6994  .addReg(0);
6995  DestBase = MachineOperand::CreateReg(Reg, false);
6996  DestDisp = 0;
6997  }
6998  if (!isUInt<12>(SrcDisp)) {
6999  unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
7000  BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg)
7001  .add(SrcBase)
7002  .addImm(SrcDisp)
7003  .addReg(0);
7004  SrcBase = MachineOperand::CreateReg(Reg, false);
7005  SrcDisp = 0;
7006  }
7007  BuildMI(*MBB, MI, DL, TII->get(Opcode))
7008  .add(DestBase)
7009  .addImm(DestDisp)
7010  .addImm(ThisLength)
7011  .add(SrcBase)
7012  .addImm(SrcDisp)
7013  .setMemRefs(MI.memoperands());
7014  DestDisp += ThisLength;
7015  SrcDisp += ThisLength;
7016  Length -= ThisLength;
7017  // If there's another CLC to go, branch to the end if a difference
7018  // was found.
7019  if (EndMBB && Length > 0) {
7020  MachineBasicBlock *NextMBB = splitBlockBefore(MI, MBB);
7021  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
7023  .addMBB(EndMBB);
7024  MBB->addSuccessor(EndMBB);
7025  MBB->addSuccessor(NextMBB);
7026  MBB = NextMBB;
7027  }
7028  }
7029  if (EndMBB) {
7030  MBB->addSuccessor(EndMBB);
7031  MBB = EndMBB;
7032  MBB->addLiveIn(SystemZ::CC);
7033  }
7034 
7035  MI.eraseFromParent();
7036  return MBB;
7037 }
7038 
7039 // Decompose string pseudo-instruction MI into a loop that continually performs
7040 // Opcode until CC != 3.
7041 MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
7042  MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
7043  MachineFunction &MF = *MBB->getParent();
7044  const SystemZInstrInfo *TII =
7045  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
7047  DebugLoc DL = MI.getDebugLoc();
7048 
7049  uint64_t End1Reg = MI.getOperand(0).getReg();
7050  uint64_t Start1Reg = MI.getOperand(1).getReg();
7051  uint64_t Start2Reg = MI.getOperand(2).getReg();
7052  uint64_t CharReg = MI.getOperand(3).getReg();
7053 
7054  const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
7055  uint64_t This1Reg = MRI.createVirtualRegister(RC);
7056  uint64_t This2Reg = MRI.createVirtualRegister(RC);
7057  uint64_t End2Reg = MRI.createVirtualRegister(RC);
7058 
7059  MachineBasicBlock *StartMBB = MBB;
7060  MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
7061  MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
7062 
7063  // StartMBB:
7064  // # fall through to LoopMMB
7065  MBB->addSuccessor(LoopMBB);
7066 
7067  // LoopMBB:
7068  // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
7069  // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
7070  // R0L = %CharReg
7071  // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
7072  // JO LoopMBB
7073  // # fall through to DoneMMB
7074  //
7075  // The load of R0L can be hoisted by post-RA LICM.
7076  MBB = LoopMBB;
7077 
7078  BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
7079  .addReg(Start1Reg).addMBB(StartMBB)
7080  .addReg(End1Reg).addMBB(LoopMBB);
7081  BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
7082  .addReg(Start2Reg).addMBB(StartMBB)
7083  .addReg(End2Reg).addMBB(LoopMBB);
7084  BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
7085  BuildMI(MBB, DL, TII->get(Opcode))
7086  .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
7087  .addReg(This1Reg).addReg(This2Reg);
7088  BuildMI(MBB, DL, TII->get(SystemZ::BRC))
7090  MBB->addSuccessor(LoopMBB);
7091  MBB->addSuccessor(DoneMBB);
7092 
7093  DoneMBB->addLiveIn(SystemZ::CC);
7094 
7095  MI.eraseFromParent();
7096  return DoneMBB;
7097 }
7098 
7099 // Update TBEGIN instruction with final opcode and register clobbers.
7100 MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
7101  MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
7102  bool NoFloat) const {
7103  MachineFunction &MF = *MBB->getParent();
7104  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
7105  const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
7106 
7107  // Update opcode.
7108  MI.setDesc(TII->get(Opcode));
7109 
7110  // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
7111  // Make sure to add the corresponding GRSM bits if they are missing.
7112  uint64_t Control = MI.getOperand(2).getImm();
7113  static const unsigned GPRControlBit[16] = {
7114  0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
7115  0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
7116  };
7117  Control |= GPRControlBit[15];
7118  if (TFI->hasFP(MF))
7119  Control |= GPRControlBit[11];
7120  MI.getOperand(2).setImm(Control);
7121 
7122  // Add GPR clobbers.
7123  for (int I = 0; I < 16; I++) {
7124  if ((Control & GPRControlBit[I]) == 0) {
7125  unsigned Reg = SystemZMC::GR64Regs[I];
7126  MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
7127  }
7128  }
7129 
7130  // Add FPR/VR clobbers.
7131  if (!NoFloat && (Control & 4) != 0) {
7132  if (Subtarget.hasVector()) {
7133  for (int I = 0; I < 32; I++) {
7134  unsigned Reg = SystemZMC::VR128Regs[I];
7135  MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
7136  }
7137  } else {
7138  for (int I = 0; I < 16; I++) {
7139  unsigned Reg = SystemZMC::FP64Regs[I];
7140  MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
7141  }
7142  }
7143  }
7144 
7145  return MBB;
7146 }
7147 
7148 MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
7149  MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
7150  MachineFunction &MF = *MBB->getParent();
7152  const SystemZInstrInfo *TII =
7153  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
7154  DebugLoc DL = MI.getDebugLoc();
7155 
7156  unsigned SrcReg = MI.getOperand(0).getReg();
7157 
7158  // Create new virtual register of the same class as source.
7159  const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
7160  unsigned DstReg = MRI->createVirtualRegister(RC);
7161 
7162  // Replace pseudo with a normal load-and-test that models the def as
7163  // well.
7164  BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
7165  .addReg(SrcReg);
7166  MI.eraseFromParent();
7167 
7168  return MBB;
7169 }
7170 
7172  MachineInstr &MI, MachineBasicBlock *MBB) const {
7173  switch (MI.getOpcode()) {
7174  case SystemZ::Select32:
7175  case SystemZ::Select64:
7176  case SystemZ::SelectF32:
7177  case SystemZ::SelectF64:
7178  case SystemZ::SelectF128:
7179  case SystemZ::SelectVR32:
7180  case SystemZ::SelectVR64:
7181  case SystemZ::SelectVR128:
7182  return emitSelect(MI, MBB);
7183 
7184  case SystemZ::CondStore8Mux:
7185  return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
7186  case SystemZ::CondStore8MuxInv:
7187  return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
7188  case SystemZ::CondStore16Mux:
7189  return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
7190  case SystemZ::CondStore16MuxInv:
7191  return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
7192  case SystemZ::CondStore32Mux:
7193  return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
7194  case SystemZ::CondStore32MuxInv:
7195  return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
7196  case SystemZ::CondStore8:
7197  return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
7198  case SystemZ::CondStore8Inv:
7199  return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
7200  case SystemZ::CondStore16:
7201  return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
7202  case SystemZ::CondStore16Inv:
7203  return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
7204  case SystemZ::CondStore32:
7205  return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
7206  case SystemZ::CondStore32Inv:
7207  return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
7208  case SystemZ::CondStore64:
7209  return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
7210  case SystemZ::CondStore64Inv:
7211  return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
7212  case SystemZ::CondStoreF32:
7213  return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
7214  case SystemZ::CondStoreF32Inv:
7215  return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
7216  case SystemZ::CondStoreF64:
7217  return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
7218  case SystemZ::CondStoreF64Inv:
7219  return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
7220 
7221  case SystemZ::PAIR128:
7222  return emitPair128(MI, MBB);
7223  case SystemZ::AEXT128:
7224  return emitExt128(MI, MBB, false);
7225  case SystemZ::ZEXT128:
7226  return emitExt128(MI, MBB, true);
7227 
7228  case SystemZ::ATOMIC_SWAPW:
7229  return emitAtomicLoadBinary(MI, MBB, 0, 0);
7230  case SystemZ::ATOMIC_SWAP_32:
7231  return emitAtomicLoadBinary(MI, MBB, 0, 32);
7232  case SystemZ::ATOMIC_SWAP_64:
7233  return emitAtomicLoadBinary(MI, MBB, 0, 64);
7234 
7235  case SystemZ::ATOMIC_LOADW_AR:
7236  return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 0);
7237  case SystemZ::ATOMIC_LOADW_AFI:
7238  return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 0);
7239  case SystemZ::ATOMIC_LOAD_AR:
7240  return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 32);
7241  case SystemZ::ATOMIC_LOAD_AHI:
7242  return emitAtomicLoadBinary(MI, MBB, SystemZ::AHI, 32);
7243  case SystemZ::ATOMIC_LOAD_AFI:
7244  return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 32);
7245  case SystemZ::ATOMIC_LOAD_AGR:
7246  return emitAtomicLoadBinary(MI, MBB, SystemZ::AGR, 64);
7247  case SystemZ::ATOMIC_LOAD_AGHI:
7248  return emitAtomicLoadBinary(MI, MBB, SystemZ::AGHI, 64);
7249  case SystemZ::ATOMIC_LOAD_AGFI:
7250  return emitAtomicLoadBinary(MI, MBB, SystemZ::AGFI, 64);
7251 
7252  case SystemZ::ATOMIC_LOADW_SR:
7253  return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 0);
7254  case SystemZ::ATOMIC_LOAD_SR:
7255  return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 32);
7256  case SystemZ::ATOMIC_LOAD_SGR:
7257  return emitAtomicLoadBinary(MI, MBB, SystemZ::SGR, 64);
7258 
7259  case SystemZ::ATOMIC_LOADW_NR:
7260  return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0);
7261  case SystemZ::ATOMIC_LOADW_NILH:
7262  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0);
7263  case SystemZ::ATOMIC_LOAD_NR:
7264  return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32);
7265  case SystemZ::ATOMIC_LOAD_NILL:
7266  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32);
7267  case SystemZ::ATOMIC_LOAD_NILH:
7268  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32);
7269  case SystemZ::ATOMIC_LOAD_NILF:
7270  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32);
7271  case SystemZ::ATOMIC_LOAD_NGR:
7272  return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64);
7273  case SystemZ::ATOMIC_LOAD_NILL64:
7274  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64);
7275  case SystemZ::ATOMIC_LOAD_NILH64:
7276  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64);
7277  case SystemZ::ATOMIC_LOAD_NIHL64:
7278  return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64);
7279  case SystemZ::ATOMIC_LOAD_NIHH64:
7280  return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64);
7281  case SystemZ::ATOMIC_LOAD_NILF64:
7282  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64);
7283  case SystemZ::ATOMIC_LOAD_NIHF64:
7284  return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64);
7285 
7287  return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 0);
7288  case SystemZ::ATOMIC_LOADW_OILH:
7289  return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 0);
7291  return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 32);
7292  case SystemZ::ATOMIC_LOAD_OILL:
7293  return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 32);
7294  case SystemZ::ATOMIC_LOAD_OILH:
7295  return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 32);
7296  case SystemZ::ATOMIC_LOAD_OILF:
7297  return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 32);
7298  case SystemZ::ATOMIC_LOAD_OGR:
7299  return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64);
7300  case SystemZ::ATOMIC_LOAD_OILL64:
7301  return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL64, 64);
7302  case SystemZ::ATOMIC_LOAD_OILH64:
7303  return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH64, 64);
7304  case SystemZ::ATOMIC_LOAD_OIHL64:
7305  return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL64, 64);
7306  case SystemZ::ATOMIC_LOAD_OIHH64:
7307  return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH64, 64);
7308  case SystemZ::ATOMIC_LOAD_OILF64:
7309  return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF64, 64);
7310  case SystemZ::ATOMIC_LOAD_OIHF64:
7311  return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF64, 64);
7312 
7313  case SystemZ::ATOMIC_LOADW_XR:
7314  return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 0);
7315  case SystemZ::ATOMIC_LOADW_XILF:
7316  return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 0);
7317  case SystemZ::ATOMIC_LOAD_XR:
7318  return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 32);
7319  case SystemZ::ATOMIC_LOAD_XILF:
7320  return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 32);
7321  case SystemZ::ATOMIC_LOAD_XGR:
7322  return emitAtomicLoadBinary(MI, MBB, SystemZ::XGR, 64);
7323  case SystemZ::ATOMIC_LOAD_XILF64:
7324  return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF64, 64);
7325  case SystemZ::ATOMIC_LOAD_XIHF64:
7326  return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF64, 64);
7327 
7328  case SystemZ::ATOMIC_LOADW_NRi:
7329  return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0, true);
7330  case SystemZ::ATOMIC_LOADW_NILHi:
7331  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0, true);
7332  case SystemZ::ATOMIC_LOAD_NRi:
7333  return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32, true);
7334  case SystemZ::ATOMIC_LOAD_NILLi:
7335  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32, true);
7336  case SystemZ::ATOMIC_LOAD_NILHi:
7337  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32, true);
7338  case SystemZ::ATOMIC_LOAD_NILFi:
7339  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32, true);
7340  case SystemZ::ATOMIC_LOAD_NGRi:
7341  return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true);
7342  case SystemZ::ATOMIC_LOAD_NILL64i:
7343  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64, true);
7344  case SystemZ::ATOMIC_LOAD_NILH64i:
7345  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64, true);
7346  case SystemZ::ATOMIC_LOAD_NIHL64i:
7347  return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64, true);
7348  case SystemZ::ATOMIC_LOAD_NIHH64i:
7349  return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64, true);
7350  case SystemZ::ATOMIC_LOAD_NILF64i:
7351  return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64, true);
7352  case SystemZ::ATOMIC_LOAD_NIHF64i:
7353  return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64, true);
7354 
7356  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
7358  case SystemZ::ATOMIC_LOAD_MIN_32:
7359  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
7361  case SystemZ::ATOMIC_LOAD_MIN_64:
7362  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR,
7364 
7366  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
7368  case SystemZ::ATOMIC_LOAD_MAX_32:
7369  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
7371  case SystemZ::ATOMIC_LOAD_MAX_64:
7372  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR,
7374 
7376  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
7378  case SystemZ::ATOMIC_LOAD_UMIN_32:
7379  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
7381  case SystemZ::ATOMIC_LOAD_UMIN_64:
7382  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR,
7384 
7386  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
7388  case SystemZ::ATOMIC_LOAD_UMAX_32:
7389  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
7391  case SystemZ::ATOMIC_LOAD_UMAX_64:
7392  return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR,
7394 
7396  return emitAtomicCmpSwapW(MI, MBB);
7397  case SystemZ::MVCSequence:
7398  case SystemZ::MVCLoop:
7399  return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
7400  case SystemZ::NCSequence:
7401  case SystemZ::NCLoop:
7402  return emitMemMemWrapper(MI, MBB, SystemZ::NC);
7403  case SystemZ::OCSequence:
7404  case SystemZ::OCLoop:
7405  return emitMemMemWrapper(MI, MBB, SystemZ::OC);
7406  case SystemZ::XCSequence:
7407  case SystemZ::XCLoop:
7408  return emitMemMemWrapper(MI, MBB, SystemZ::XC);
7409  case SystemZ::CLCSequence:
7410  case SystemZ::CLCLoop:
7411  return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
7412  case SystemZ::CLSTLoop:
7413  return emitStringWrapper(MI, MBB, SystemZ::CLST);
7414  case SystemZ::MVSTLoop:
7415  return emitStringWrapper(MI, MBB, SystemZ::MVST);
7416  case SystemZ::SRSTLoop:
7417  return emitStringWrapper(MI, MBB, SystemZ::SRST);
7418  case SystemZ::TBEGIN:
7419  return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
7420  case SystemZ::TBEGIN_nofloat:
7421  return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
7422  case SystemZ::TBEGINC:
7423  return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
7424  case SystemZ::LTEBRCompare_VecPseudo:
7425  return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
7426  case SystemZ::LTDBRCompare_VecPseudo:
7427  return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
7428  case SystemZ::LTXBRCompare_VecPseudo:
7429  return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
7430 
7431  case TargetOpcode::STACKMAP:
7432  case TargetOpcode::PATCHPOINT:
7433  return emitPatchPoint(MI, MBB);
7434 
7435  default:
7436  llvm_unreachable("Unexpected instr type to insert");
7437  }
7438 }
7439 
7440 // This is only used by the isel schedulers, and is needed only to prevent
7441 // compiler from crashing when list-ilp is used.
7442 const TargetRegisterClass *
7443 SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
7444  if (VT == MVT::Untyped)
7445  return &SystemZ::ADDR128BitRegClass;
7447 }
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, const Permute &P, SDValue Op0, SDValue Op1)
bool isMachineConstantPoolEntry() const
static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static unsigned CCMaskForCondCode(ISD::CondCode CC)
void setFrameAddressIsTaken(bool T)
uint64_t CallInst * C
const unsigned CCMASK_CMP_GT
Definition: SystemZ.h:37
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:571
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:349
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:538
static MVT getIntegerVT(unsigned BitWidth)
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
const MachineInstrBuilder & add(const MachineOperand &MO) const
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:552
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:877
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:594
EVT getValueType() const
Return the ValueType of the referenced return value.
static bool getShuffleInput(const SmallVectorImpl< int > &Bytes, unsigned Start, unsigned BytesPerElement, int &Base)
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
static void VerifyVectorTypes(const SmallVectorImpl< ISD::InputArg > &Ins)
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
bool isZero() const
Definition: APFloat.h:1143
bool isUndef() const
const unsigned CCMASK_ARITH
Definition: SystemZ.h:55
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:35
T findLastSet(T Val, ZeroBehavior ZB=ZB_Max)
Get the index of the last set bit starting from the least significant bit.
Definition: MathExtras.h:244
const GlobalValue * getGlobal() const
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant, which is required to be operand #1) half of the integer or float value specified as operand #0.
Definition: ISDOpcodes.h:184
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1563
const unsigned PFD_READ
Definition: SystemZ.h:115
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
LLVMContext & Context
const unsigned GR32Regs[16]
static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, unsigned &CCValid)
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
const int64_t CallFrameSize
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it&#39;s not CSE&#39;d)...
Definition: SelectionDAG.h:836
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override
Return the ValueType of the result of SETCC operations.
static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, bool IsNegative)
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:650
This class represents lattice values for constants.
Definition: AllocatorList.h:24
static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Value)
static const Permute PermuteForms[]
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
const MCPhysReg ArgFPRs[NumArgFPRs]
static MVT getVectorVT(MVT VT, unsigned NumElements)
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0...
Definition: ISDOpcodes.h:605
const unsigned CCMASK_FCMP
Definition: SystemZ.h:48
const unsigned FP128Regs[16]
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:367
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:260
unsigned odd128(bool Is32bit)
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the &#39;representative&#39; register class for the specified value type.
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:519
bool isVector() const
Return true if this is a vector value type.
void addLiveIn(unsigned Reg, unsigned vreg=0)
addLiveIn - Add the specified register as a live-in.
const SDValue & getBasePtr() const
static bool isImmHH(uint64_t Val)
Definition: SystemZ.h:176
static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, BuildVectorSDNode *BVN)
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine the number of bits in the operation that are sign bits.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:383
unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
unsigned getReg() const
getReg - Returns the register number.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
Definition: StringRef.h:138
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain...
Definition: ISDOpcodes.h:699
SDVTList getVTList() const
This class represents a function call, abstracting a target machine&#39;s calling convention.
unsigned Reg
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:251
const SDValue & getChain() const
static SDValue tryBuildVectorReplicate(SelectionDAG &DAG, const SystemZInstrInfo *TII, const SDLoc &DL, EVT VT, uint64_t Value, unsigned BitsPerElement)
Function Alias Analysis Results
static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond)
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:321
static std::pair< unsigned, const TargetRegisterClass * > parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, const unsigned *Map, unsigned Size)
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
Val, Success, OUTCHAIN = ATOMIC_CMP_SWAP_WITH_SUCCESS(INCHAIN, ptr, cmp, swap) N.b.
Definition: ISDOpcodes.h:802
unsigned second
static bool canUseSiblingCall(const CCState &ArgCCInfo, SmallVectorImpl< CCValAssign > &ArgLocs, SmallVectorImpl< ISD::OutputArg > &Outs)
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:811
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:306
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:141
static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, SDValue Op1)
const unsigned FP32Regs[16]
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:508
SDNode * getNode() const
get the SDNode which holds the desired result
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1437
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned char TargetFlags=0)
static void createPHIsForSelects(MachineBasicBlock::iterator MIItBegin, MachineBasicBlock::iterator MIItEnd, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
Same for subtraction.
Definition: ISDOpcodes.h:254
void reserve(size_type N)
Definition: SmallVector.h:376
Value * CallOperandVal
If this is the result output operand or a clobber, this is null, otherwise it is the incoming operand...
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1)
const unsigned CCMASK_CS_EQ
Definition: SystemZ.h:67
const unsigned CCMASK_LOGICAL_CARRY
Definition: SystemZ.h:60
SDValue getConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offs=0, bool isT=false, unsigned char TargetFlags=0)
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:189
static bool shouldSwapCmpOperands(const Comparison &C)
const unsigned CCMASK_ICMP
Definition: SystemZ.h:47
uint64_t High
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:781
TargetLowering::ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:435
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP)
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1509
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
bool isMemLoc() const
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
const unsigned PFD_WRITE
Definition: SystemZ.h:116
const unsigned VR64Regs[32]
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:210
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:136
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations...
Definition: ISDOpcodes.h:456
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
The address of a basic block.
Definition: Constants.h:840
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
static bool matchDoublePermute(const SmallVectorImpl< int > &Bytes, const Permute &P, SmallVectorImpl< int > &Transform)
void setBit(unsigned BitPosition)
Set a given bit to 1.
Definition: APInt.h:1403
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
static MachineBasicBlock * splitBlockAfter(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
Shift and rotation operations.
Definition: ISDOpcodes.h:410
static bool isShlDoublePermute(const SmallVectorImpl< int > &Bytes, unsigned &StartIndex, unsigned &OpNo0, unsigned &OpNo1)
virtual bool hasFP(const MachineFunction &MF) const =0
hasFP - Return true if the specified function should have a dedicated frame pointer register...
const unsigned CCMASK_TM_MSB_0
Definition: SystemZ.h:83
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
static Optional< unsigned > getOpcode(ArrayRef< VPValue *> Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:197
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:191
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
unsigned getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
SimpleValueType SimpleTy
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:304
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amt) For double-word atomic operations: ValLo, ValHi, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amtLo, amtHi) ValLo, ValHi, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amtLo, amtHi) These correspond to the atomicrmw instruction.
Definition: ISDOpcodes.h:810
unsigned Intr
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:460
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:401
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG...
Definition: ISDOpcodes.h:73
static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, SDValue Op0, SDValue Op1, SDValue &Hi, SDValue &Lo)
const BlockAddress * getBlockAddress() const
LocInfo getLocInfo() const
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:978
KnownBits zext(unsigned BitWidth)
Zero extends the underlying known Zero and One bits.
Definition: KnownBits.h:119
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SmallVector< ISD::InputArg, 32 > Ins
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:695
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
static bool isScalarToVector(SDValue Op)
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:298
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:398
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:478
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
SDValue getRegisterMask(const uint32_t *RegMask)
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:402
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:429
static SDNode * emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
This contains information for each constraint that we are lowering.
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
const unsigned CCMASK_TBEGIN
Definition: SystemZ.h:92
SmallVector< ISD::OutputArg, 32 > Outs
static AddressingMode getLoadStoreAddrMode(bool HasVector, Type *Ty)
const unsigned NumArgFPRs
static bool matchPermute(const SmallVectorImpl< int > &Bytes, const Permute &P, unsigned &OpNo0, unsigned &OpNo1)
KnownBits zextOrTrunc(unsigned BitWidth)
Zero extends or truncates the underlying known Zero and One bits.
Definition: KnownBits.h:131
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:852
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
static mvt_range integer_vector_valuetypes()
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:959
const unsigned CCMASK_ANY
Definition: SystemZ.h:31
virtual const TargetInstrInfo * getInstrInfo() const
#define EQ(a, b)
Definition: regexec.c:112
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:576
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:497
const unsigned VectorBits
Definition: SystemZ.h:154
bool isTruncateFree(Type *, Type *) const override
Return true if it&#39;s free to truncate a value of type FromTy to type ToTy.
ArrayRef< SDUse > ops() const
const unsigned CCMASK_CS_NE
Definition: SystemZ.h:68
const unsigned CCMASK_TM_SOME_0
Definition: SystemZ.h:81
const unsigned CCMASK_CS
Definition: SystemZ.h:69
amdgpu Simplify well known AMD library false Value * Callee
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
Value * getOperand(unsigned i) const
Definition: User.h:170
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
const unsigned CCMASK_TM_ALL_1
Definition: SystemZ.h:80
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
UNDEF - An undefined node.
Definition: ISDOpcodes.h:178
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:524
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:327
TargetInstrInfo - Interface to description of machine instruction set.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits...
SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI)
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
const unsigned CCMASK_LOGICAL
Definition: SystemZ.h:64
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
const unsigned CCMASK_LOGICAL_BORROW
Definition: SystemZ.h:62
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:346
#define P(N)
static void adjustForLTGFR(Comparison &C)
bool isNegZero() const
Definition: APFloat.h:1159
virtual const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const
Return a mask of call-preserved registers for the given calling convention on the current function...
MachineInstrBundleIterator< MachineInstr > iterator
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:343
unsigned const MachineRegisterInfo * MRI
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:120
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:516
Machine Value Type.
const unsigned CCMASK_VCMP
Definition: SystemZ.h:104
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
Simple binary floating point operators.
Definition: ISDOpcodes.h:283
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
const unsigned CCMASK_TM
Definition: SystemZ.h:85
bool registerDefIsDead(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Returns true if the register is dead in this machine instruction.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType)
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:66
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE...
Definition: ISDOpcodes.h:728
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
unsigned getOpcodeForOffset(unsigned Opcode, int64_t Offset) const
const SDValue & getOperand(unsigned Num) const
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:934
static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In)
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:332
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
const unsigned CCMASK_CMP_LE
Definition: SystemZ.h:39
static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SmallVectorImpl< SDValue > &Elems)
static ManagedStatic< OptionRegistry > OR
Definition: Options.cpp:31
static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
Definition: SelectionDAG.h:824
static bool getVPermMask(SDValue ShuffleOp, SmallVectorImpl< int > &Bytes)
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
#define CONV(X)
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
static mvt_range fp_valuetypes()
This class provides iterator support for SDUse operands that use a specific SDNode.
static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, SDValue Op)
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
const unsigned CCMASK_TM_MSB_1
Definition: SystemZ.h:84
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:767
bool definesRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr fully defines the specified register.
const unsigned CCMASK_CMP_LT
Definition: SystemZ.h:36
static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op0, SDValue Op1)
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:57
void setPrefFunctionAlignment(unsigned Align)
Set the target&#39;s preferred function alignment.
const unsigned CCMASK_3
Definition: SystemZ.h:30
static mvt_range vector_valuetypes()
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, unsigned Size=0)
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y)...
const unsigned CCMASK_TDC
Definition: SystemZ.h:109
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, ISD::CondCode Cond, const SDLoc &DL)
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
static unsigned forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII)
lazy value info
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo...
Definition: ISDOpcodes.h:796
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:556
Extended Value Type.
Definition: ValueTypes.h:34
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static SDValue convertLocVTToValVT(SelectionDAG &DAG, const SDLoc &DL, CCValAssign &VA, SDValue Chain, SDValue Value)
#define OPCODE(NAME)
This structure contains all information that is necessary for lowering calls.
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask)
size_t size() const
Definition: SmallVector.h:53
static bool isImmLH(uint64_t Val)
Definition: SystemZ.h:166
T findFirstSet(T Val, ZeroBehavior ZB=ZB_Max)
Get the index of the first set bit starting from the least significant bit.
Definition: MathExtras.h:203
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, bool *Fast) const override
Determine if the target supports unaligned memory accesses.
This class contains a discriminated union of information about pointers in memory operands...
unsigned getNumOperands() const
Return the number of values used by this operation.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
bool hasVectorEnhancements1() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
AddressingMode(bool LongDispl, bool IdxReg)
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:971
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
SDValue CreateStackTemporary(EVT VT, unsigned minAlign=1)
Create a stack temporary, suitable for holding the specified value type.
const unsigned CCMASK_TM_MIXED_MSB_0
Definition: SystemZ.h:78
unsigned first
bool allowTruncateForTailCall(Type *, Type *) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
void setIsKill(bool Val=true)
unsigned getPointerSize(unsigned AS) const
Get the pointer size for this target.
SDValue getTargetConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:639
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:633
static bool isUndef(ArrayRef< int > Mask)
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:404
Iterator for intrusive lists based on ilist_node.
static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, unsigned OpNo)
CCState - This class holds information needed while lowering arguments and return values...
TargetLowering::ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:309
BlockVerifier::State From
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:339
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:265
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:148
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:222
static SDNode * emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
const unsigned CCMASK_CMP_EQ
Definition: SystemZ.h:35
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:734
CCValAssign - Represent assignment of one arg/retval to a location.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo)
const unsigned FP64Regs[16]
static bool isSimpleShift(SDValue N, unsigned &ShiftVal)
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:96
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:644
Information about stack frame layout on the target.
const DataFlowGraph & G
Definition: RDFGraph.cpp:211
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask)
static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL, CCValAssign &VA, SDValue Value)
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:710
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:413
const unsigned CCMASK_CMP_O
Definition: SystemZ.h:44
const unsigned GR128Regs[16]
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
const Constant * getConstVal() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
const unsigned CCMASK_CMP_NE
Definition: SystemZ.h:38
Represents one node in the SelectionDAG.
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
Return the operation corresponding to !(X op Y), where &#39;op&#39; is a valid SetCC operation.
#define NC
Definition: regutils.h:42
static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In)
int64_t getImm() const
const unsigned GR64Regs[16]
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:679
const Function & getFunction() const
Return the LLVM function that this machine code represents.
static mvt_range integer_valuetypes()
const unsigned CCMASK_TM_MIXED_MSB_1
Definition: SystemZ.h:79
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N)
Test whether the given value is a constant int or similar node.
EVT getMemoryVT() const
Return the type of the in-memory value.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Class for arbitrary precision integers.
Definition: APInt.h:70
CodeModel::Model getCodeModel() const
Returns the code model.
void LowerOperationWrapper(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked by the type legalizer to legalize nodes with an illegal operand type but leg...
const unsigned VectorBytes
Definition: SystemZ.h:158
iterator_range< use_iterator > uses()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:420
void setMinFunctionAlignment(unsigned Align)
Set the target&#39;s minimum function alignment (in log2(bytes))
bool readsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:468
static void VerifyVectorType(MVT VT, EVT ArgVT)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the &#39;usesCustomInserter&#39; fla...
const unsigned CCMASK_0
Definition: SystemZ.h:27
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:471
bool isTailCall() const
const unsigned CCMASK_CMP_GE
Definition: SystemZ.h:40
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:312
static MachineOperand earlyUseOperand(MachineOperand Op)
static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask)
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:254
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
#define Success
static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, SDValue *Ops, const SmallVectorImpl< int > &Bytes)
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca...
Definition: ISDOpcodes.h:859
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:638
Representation of each machine instruction.
Definition: MachineInstr.h:64
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer, a SRCVALUE for the destination, and a SRCVALUE for the source.
Definition: ISDOpcodes.h:724
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool killsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr kills the specified register.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:673
SmallVector< SDValue, 32 > OutVals
static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg)
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
static MachineBasicBlock * emitBlockAfter(MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:387
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:705
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getLocMemOffset() const
static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:206
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:486
static bool is32Bit(EVT VT)
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:614
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
unsigned MaxStoresPerMemcpyOptSize
Maximum number of store operations that may be substituted for a call to memcpy, used for functions w...
void setStackPointerRegisterToSaveRestore(unsigned R)
If set to a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save and restore.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
const unsigned VR32Regs[32]
uint32_t Size
Definition: Profile.cpp:47
static bool isSelectPseudo(MachineInstr &MI)
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
static MachineBasicBlock * splitBlockBefore(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
unsigned getOpcode() const
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:608
SDValue getValue(unsigned R) const
unsigned MaxStoresPerMemcpy
Specify maximum bytes of store instructions per memcpy call.
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:346
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static unsigned reverseCCMask(unsigned CCMask)
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:749
bool isReg() const
isReg - Tests if this is a MO_Register operand.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, uint64_t Mask, uint64_t CmpVal, unsigned ICmpType)
bool isRegLoc() const
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, const Value *PtrVal, unsigned Alignment, AtomicOrdering Ordering, SyncScope::ID SSID)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
Definition: SelectionDAG.h:857
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if &#39;Op & Mask&#39; is known to be zero.
MachineConstantPoolValue * getMachineCPVal() const
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
user_iterator user_begin()
Definition: Value.h:376
void insert(iterator MBBI, MachineBasicBlock *MBB)
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
void setReturnAddressIsTaken(bool s)
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
LLVM Value Representation.
Definition: Value.h:73
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:302
SDValue getRegister(unsigned Reg, EVT VT)
unsigned getResNo() const
get the index which selects a specific result in the SDNode
unsigned even128(bool Is32bit)
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override
Returns true if the target can instruction select the specified FP immediate natively.
static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, unsigned Opcode, SDValue Op0, SDValue Op1, SDValue &Even, SDValue &Odd)
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:59
unsigned getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
SDValue getValueType(EVT)
KnownBits sext(unsigned BitWidth)
Sign extends the underlying known Zero and One bits.
Definition: KnownBits.h:125
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:776
static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone...
const unsigned CCMASK_ARITH_OVERFLOW
Definition: SystemZ.h:54
static bool isImmLL(uint64_t Val)
Definition: SystemZ.h:161
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
A SystemZ-specific constant pool value.
bool isRxSBGMask(uint64_t Mask, unsigned BitSize, unsigned &Start, unsigned &End) const
IRTranslator LLVM IR MI
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:413
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:443
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
unsigned MaxStoresPerMemsetOptSize
Maximum number of stores operations that may be substituted for the call to memset, used for functions with OptSize attribute.
const unsigned IPM_CC
Definition: SystemZ.h:112
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:198
unsigned getNumOperands() const
Conversion operators.
Definition: ISDOpcodes.h:465
const SDValue & getOperand(unsigned i) const
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
static void adjustForFNeg(Comparison &C)
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:789
static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
unsigned getLocReg() const
bool isExtInLoc() const
const unsigned CCMASK_TM_ALL_0
Definition: SystemZ.h:77
uint64_t getZExtValue() const
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:474
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:126
const unsigned CCMASK_TEND
Definition: SystemZ.h:97
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
static AddressingMode supportedAddressingMode(Instruction *I, bool HasVector)
iterator end() const
Definition: StringRef.h:108
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:584
bool hasPopulationCount() const
const unsigned VR128Regs[32]
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:785
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP, bool &Invert)
const SystemZRegisterInfo * getRegisterInfo() const override
LLVMContext * getContext() const
Definition: SelectionDAG.h:407
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:375
const unsigned CCMASK_TM_SOME_1
Definition: SystemZ.h:82
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:242
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:67
static SystemZConstantPoolValue * Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier)
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:622
const unsigned CCMASK_CMP_UO
Definition: SystemZ.h:43
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:380
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
void resize(size_type N)
Definition: SmallVector.h:351
This class is used to represent ISD::LOAD nodes.
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary...
Definition: ISDOpcodes.h:623
static bool isImmHL(uint64_t Val)
Definition: SystemZ.h:171