LLVM  8.0.1
MipsSEISelLowering.cpp
Go to the documentation of this file.
1 //===- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Subclass of MipsTargetLowering specialized for mips32/64.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "MipsSEISelLowering.h"
15 #include "MipsMachineFunction.h"
16 #include "MipsRegisterInfo.h"
17 #include "MipsSubtarget.h"
18 #include "llvm/ADT/APInt.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Triple.h"
36 #include "llvm/IR/DebugLoc.h"
37 #include "llvm/IR/Intrinsics.h"
38 #include "llvm/Support/Casting.h"
40 #include "llvm/Support/Debug.h"
45 #include <algorithm>
46 #include <cassert>
47 #include <cstdint>
48 #include <iterator>
49 #include <utility>
50 
51 using namespace llvm;
52 
53 #define DEBUG_TYPE "mips-isel"
54 
55 static cl::opt<bool>
56 UseMipsTailCalls("mips-tail-calls", cl::Hidden,
57  cl::desc("MIPS: permit tail calls."), cl::init(false));
58 
59 static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false),
60  cl::desc("Expand double precision loads and "
61  "stores to their single precision "
62  "counterparts"));
63 
65  const MipsSubtarget &STI)
66  : MipsTargetLowering(TM, STI) {
67  // Set up the register classes
68  addRegisterClass(MVT::i32, &Mips::GPR32RegClass);
69 
70  if (Subtarget.isGP64bit())
71  addRegisterClass(MVT::i64, &Mips::GPR64RegClass);
72 
73  if (Subtarget.hasDSP() || Subtarget.hasMSA()) {
74  // Expand all truncating stores and extending loads.
75  for (MVT VT0 : MVT::vector_valuetypes()) {
76  for (MVT VT1 : MVT::vector_valuetypes()) {
77  setTruncStoreAction(VT0, VT1, Expand);
81  }
82  }
83  }
84 
85  if (Subtarget.hasDSP()) {
87 
88  for (unsigned i = 0; i < array_lengthof(VecTys); ++i) {
89  addRegisterClass(VecTys[i], &Mips::DSPRRegClass);
90 
91  // Expand all builtin opcodes.
92  for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
93  setOperationAction(Opc, VecTys[i], Expand);
94 
95  setOperationAction(ISD::ADD, VecTys[i], Legal);
96  setOperationAction(ISD::SUB, VecTys[i], Legal);
97  setOperationAction(ISD::LOAD, VecTys[i], Legal);
98  setOperationAction(ISD::STORE, VecTys[i], Legal);
100  }
101 
107 
108  if (Subtarget.hasMips32r2()) {
111  }
112  }
113 
114  if (Subtarget.hasDSPR2())
116 
117  if (Subtarget.hasMSA()) {
118  addMSAIntType(MVT::v16i8, &Mips::MSA128BRegClass);
119  addMSAIntType(MVT::v8i16, &Mips::MSA128HRegClass);
120  addMSAIntType(MVT::v4i32, &Mips::MSA128WRegClass);
121  addMSAIntType(MVT::v2i64, &Mips::MSA128DRegClass);
122  addMSAFloatType(MVT::v8f16, &Mips::MSA128HRegClass);
123  addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass);
124  addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass);
125 
126  // f16 is a storage-only type, always promote it to f32.
127  addRegisterClass(MVT::f16, &Mips::MSA128HRegClass);
163 
169  }
170 
171  if (!Subtarget.useSoftFloat()) {
172  addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
173 
174  // When dealing with single precision only, use libcalls
175  if (!Subtarget.isSingleFloat()) {
176  if (Subtarget.isFP64bit())
177  addRegisterClass(MVT::f64, &Mips::FGR64RegClass);
178  else
179  addRegisterClass(MVT::f64, &Mips::AFGR64RegClass);
180  }
181  }
182 
187 
188  if (Subtarget.hasCnMips())
190  else if (Subtarget.isGP64bit())
192 
193  if (Subtarget.isGP64bit()) {
200  }
201 
204 
210 
212 
216 
217  if (NoDPLoadStore) {
220  }
221 
222  if (Subtarget.hasMips32r6()) {
223  // MIPS32r6 replaces the accumulator-based multiplies with a three register
224  // instruction
230 
231  // MIPS32r6 replaces the accumulator-based division/remainder with separate
232  // three register division and remainder instructions.
239 
240  // MIPS32r6 replaces conditional moves with an equivalent that removes the
241  // need for three GPR read ports.
245 
249 
250  assert(Subtarget.isFP64bit() && "FR=1 is required for MIPS32r6");
254 
256 
257  // Floating point > and >= are supported via < and <=
262 
267  }
268 
269  if (Subtarget.hasMips64r6()) {
270  // MIPS64r6 replaces the accumulator-based multiplies with a three register
271  // instruction
277 
278  // MIPS32r6 replaces the accumulator-based division/remainder with separate
279  // three register division and remainder instructions.
286 
287  // MIPS64r6 replaces conditional moves with an equivalent that removes the
288  // need for three GPR read ports.
292  }
293 
295 }
296 
297 const MipsTargetLowering *
299  const MipsSubtarget &STI) {
300  return new MipsSETargetLowering(TM, STI);
301 }
302 
303 const TargetRegisterClass *
305  if (VT == MVT::Untyped)
306  return Subtarget.hasDSP() ? &Mips::ACC64DSPRegClass : &Mips::ACC64RegClass;
307 
309 }
310 
311 // Enable MSA support for the given integer type and Register class.
314  addRegisterClass(Ty, RC);
315 
316  // Expand all builtin opcodes.
317  for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
318  setOperationAction(Opc, Ty, Expand);
319 
326 
348 
349  if (Ty == MVT::v4i32 || Ty == MVT::v2i64) {
354  }
355 
362 }
363 
364 // Enable MSA support for the given floating-point type and Register class.
367  addRegisterClass(Ty, RC);
368 
369  // Expand all builtin opcodes.
370  for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
371  setOperationAction(Opc, Ty, Expand);
372 
379 
380  if (Ty != MVT::v8f16) {
392 
400  }
401 }
402 
403 SDValue MipsSETargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
404  if(!Subtarget.hasMips32r6())
405  return MipsTargetLowering::LowerOperation(Op, DAG);
406 
407  EVT ResTy = Op->getValueType(0);
408  SDLoc DL(Op);
409 
410  // Although MTC1_D64 takes an i32 and writes an f64, the upper 32 bits of the
411  // floating point register are undefined. Not really an issue as sel.d, which
412  // is produced from an FSELECT node, only looks at bit 0.
413  SDValue Tmp = DAG.getNode(MipsISD::MTC1_D64, DL, MVT::f64, Op->getOperand(0));
414  return DAG.getNode(MipsISD::FSELECT, DL, ResTy, Tmp, Op->getOperand(1),
415  Op->getOperand(2));
416 }
417 
418 bool
420  unsigned,
421  unsigned,
422  bool *Fast) const {
424 
426  // MIPS32r6/MIPS64r6 is required to support unaligned access. It's
427  // implementation defined whether this is handled by hardware, software, or
428  // a hybrid of the two but it's expected that most implementations will
429  // handle the majority of cases in hardware.
430  if (Fast)
431  *Fast = true;
432  return true;
433  }
434 
435  switch (SVT) {
436  case MVT::i64:
437  case MVT::i32:
438  if (Fast)
439  *Fast = true;
440  return true;
441  default:
442  return false;
443  }
444 }
445 
447  SelectionDAG &DAG) const {
448  switch(Op.getOpcode()) {
449  case ISD::LOAD: return lowerLOAD(Op, DAG);
450  case ISD::STORE: return lowerSTORE(Op, DAG);
451  case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG);
452  case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG);
453  case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG);
454  case ISD::MULHU: return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG);
455  case ISD::MUL: return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG);
456  case ISD::SDIVREM: return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG);
457  case ISD::UDIVREM: return lowerMulDiv(Op, MipsISD::DivRemU, true, true,
458  DAG);
459  case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG);
460  case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG);
461  case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG);
462  case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG);
463  case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG);
464  case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG);
465  case ISD::SELECT: return lowerSELECT(Op, DAG);
466  }
467 
468  return MipsTargetLowering::LowerOperation(Op, DAG);
469 }
470 
471 // Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT
472 //
473 // Performs the following transformations:
474 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its
475 // sign/zero-extension is completely overwritten by the new one performed by
476 // the ISD::AND.
477 // - Removes redundant zero extensions performed by an ISD::AND.
480  const MipsSubtarget &Subtarget) {
481  if (!Subtarget.hasMSA())
482  return SDValue();
483 
484  SDValue Op0 = N->getOperand(0);
485  SDValue Op1 = N->getOperand(1);
486  unsigned Op0Opcode = Op0->getOpcode();
487 
488  // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d)
489  // where $d + 1 == 2^n and n == 32
490  // or $d + 1 == 2^n and n <= 32 and ZExt
491  // -> (MipsVExtractZExt $a, $b, $c)
492  if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT ||
493  Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) {
495 
496  if (!Mask)
497  return SDValue();
498 
499  int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2();
500 
501  if (Log2IfPositive <= 0)
502  return SDValue(); // Mask+1 is not a power of 2
503 
504  SDValue Op0Op2 = Op0->getOperand(2);
505  EVT ExtendTy = cast<VTSDNode>(Op0Op2)->getVT();
506  unsigned ExtendTySize = ExtendTy.getSizeInBits();
507  unsigned Log2 = Log2IfPositive;
508 
509  if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) ||
510  Log2 == ExtendTySize) {
511  SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 };
512  return DAG.getNode(MipsISD::VEXTRACT_ZEXT_ELT, SDLoc(Op0),
513  Op0->getVTList(),
514  makeArrayRef(Ops, Op0->getNumOperands()));
515  }
516  }
517 
518  return SDValue();
519 }
520 
521 // Determine if the specified node is a constant vector splat.
522 //
523 // Returns true and sets Imm if:
524 // * N is a ISD::BUILD_VECTOR representing a constant splat
525 //
526 // This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The
527 // differences are that it assumes the MSA has already been checked and the
528 // arbitrary requirement for a maximum of 32-bit integers isn't applied (and
529 // must not be in order for binsri.d to be selectable).
530 static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) {
532 
533  if (!Node)
534  return false;
535 
536  APInt SplatValue, SplatUndef;
537  unsigned SplatBitSize;
538  bool HasAnyUndefs;
539 
540  if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
541  8, !IsLittleEndian))
542  return false;
543 
544  Imm = SplatValue;
545 
546  return true;
547 }
548 
549 // Test whether the given node is an all-ones build_vector.
550 static bool isVectorAllOnes(SDValue N) {
551  // Look through bitcasts. Endianness doesn't matter because we are looking
552  // for an all-ones value.
553  if (N->getOpcode() == ISD::BITCAST)
554  N = N->getOperand(0);
555 
557 
558  if (!BVN)
559  return false;
560 
561  APInt SplatValue, SplatUndef;
562  unsigned SplatBitSize;
563  bool HasAnyUndefs;
564 
565  // Endianness doesn't matter in this context because we are looking for
566  // an all-ones value.
567  if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs))
568  return SplatValue.isAllOnesValue();
569 
570  return false;
571 }
572 
573 // Test whether N is the bitwise inverse of OfNode.
574 static bool isBitwiseInverse(SDValue N, SDValue OfNode) {
575  if (N->getOpcode() != ISD::XOR)
576  return false;
577 
578  if (isVectorAllOnes(N->getOperand(0)))
579  return N->getOperand(1) == OfNode;
580 
581  if (isVectorAllOnes(N->getOperand(1)))
582  return N->getOperand(0) == OfNode;
583 
584  return false;
585 }
586 
587 // Perform combines where ISD::OR is the root node.
588 //
589 // Performs the following transformations:
590 // - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b)
591 // where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit
592 // vector type.
595  const MipsSubtarget &Subtarget) {
596  if (!Subtarget.hasMSA())
597  return SDValue();
598 
599  EVT Ty = N->getValueType(0);
600 
601  if (!Ty.is128BitVector())
602  return SDValue();
603 
604  SDValue Op0 = N->getOperand(0);
605  SDValue Op1 = N->getOperand(1);
606 
607  if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) {
608  SDValue Op0Op0 = Op0->getOperand(0);
609  SDValue Op0Op1 = Op0->getOperand(1);
610  SDValue Op1Op0 = Op1->getOperand(0);
611  SDValue Op1Op1 = Op1->getOperand(1);
612  bool IsLittleEndian = !Subtarget.isLittle();
613 
614  SDValue IfSet, IfClr, Cond;
615  bool IsConstantMask = false;
616  APInt Mask, InvMask;
617 
618  // If Op0Op0 is an appropriate mask, try to find it's inverse in either
619  // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while
620  // looking.
621  // IfClr will be set if we find a valid match.
622  if (isVSplat(Op0Op0, Mask, IsLittleEndian)) {
623  Cond = Op0Op0;
624  IfSet = Op0Op1;
625 
626  if (isVSplat(Op1Op0, InvMask, IsLittleEndian) &&
627  Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
628  IfClr = Op1Op1;
629  else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) &&
630  Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
631  IfClr = Op1Op0;
632 
633  IsConstantMask = true;
634  }
635 
636  // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same
637  // thing again using this mask.
638  // IfClr will be set if we find a valid match.
639  if (!IfClr.getNode() && isVSplat(Op0Op1, Mask, IsLittleEndian)) {
640  Cond = Op0Op1;
641  IfSet = Op0Op0;
642 
643  if (isVSplat(Op1Op0, InvMask, IsLittleEndian) &&
644  Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
645  IfClr = Op1Op1;
646  else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) &&
647  Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
648  IfClr = Op1Op0;
649 
650  IsConstantMask = true;
651  }
652 
653  // If IfClr is not yet set, try looking for a non-constant match.
654  // IfClr will be set if we find a valid match amongst the eight
655  // possibilities.
656  if (!IfClr.getNode()) {
657  if (isBitwiseInverse(Op0Op0, Op1Op0)) {
658  Cond = Op1Op0;
659  IfSet = Op1Op1;
660  IfClr = Op0Op1;
661  } else if (isBitwiseInverse(Op0Op1, Op1Op0)) {
662  Cond = Op1Op0;
663  IfSet = Op1Op1;
664  IfClr = Op0Op0;
665  } else if (isBitwiseInverse(Op0Op0, Op1Op1)) {
666  Cond = Op1Op1;
667  IfSet = Op1Op0;
668  IfClr = Op0Op1;
669  } else if (isBitwiseInverse(Op0Op1, Op1Op1)) {
670  Cond = Op1Op1;
671  IfSet = Op1Op0;
672  IfClr = Op0Op0;
673  } else if (isBitwiseInverse(Op1Op0, Op0Op0)) {
674  Cond = Op0Op0;
675  IfSet = Op0Op1;
676  IfClr = Op1Op1;
677  } else if (isBitwiseInverse(Op1Op1, Op0Op0)) {
678  Cond = Op0Op0;
679  IfSet = Op0Op1;
680  IfClr = Op1Op0;
681  } else if (isBitwiseInverse(Op1Op0, Op0Op1)) {
682  Cond = Op0Op1;
683  IfSet = Op0Op0;
684  IfClr = Op1Op1;
685  } else if (isBitwiseInverse(Op1Op1, Op0Op1)) {
686  Cond = Op0Op1;
687  IfSet = Op0Op0;
688  IfClr = Op1Op0;
689  }
690  }
691 
692  // At this point, IfClr will be set if we have a valid match.
693  if (!IfClr.getNode())
694  return SDValue();
695 
696  assert(Cond.getNode() && IfSet.getNode());
697 
698  // Fold degenerate cases.
699  if (IsConstantMask) {
700  if (Mask.isAllOnesValue())
701  return IfSet;
702  else if (Mask == 0)
703  return IfClr;
704  }
705 
706  // Transform the DAG into an equivalent VSELECT.
707  return DAG.getNode(ISD::VSELECT, SDLoc(N), Ty, Cond, IfSet, IfClr);
708  }
709 
710  return SDValue();
711 }
712 
714  SelectionDAG &DAG,
715  const MipsSubtarget &Subtarget) {
716  // Estimate the number of operations the below transform will turn a
717  // constant multiply into. The number is approximately how many powers
718  // of two summed together that the constant can be broken down into.
719 
720  SmallVector<APInt, 16> WorkStack(1, C);
721  unsigned Steps = 0;
722  unsigned BitWidth = C.getBitWidth();
723 
724  while (!WorkStack.empty()) {
725  APInt Val = WorkStack.pop_back_val();
726 
727  if (Val == 0 || Val == 1)
728  continue;
729 
730  if (Val.isPowerOf2()) {
731  ++Steps;
732  continue;
733  }
734 
735  APInt Floor = APInt(BitWidth, 1) << Val.logBase2();
736  APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0)
737  : APInt(BitWidth, 1) << C.ceilLogBase2();
738 
739  if ((Val - Floor).ule(Ceil - Val)) {
740  WorkStack.push_back(Floor);
741  WorkStack.push_back(Val - Floor);
742  ++Steps;
743  continue;
744  }
745 
746  WorkStack.push_back(Ceil);
747  WorkStack.push_back(Ceil - Val);
748  ++Steps;
749 
750  // If we have taken more than 12[1] / 8[2] steps to attempt the
751  // optimization for a native sized value, it is more than likely that this
752  // optimization will make things worse.
753  //
754  // [1] MIPS64 requires 6 instructions at most to materialize any constant,
755  // multiplication requires at least 4 cycles, but another cycle (or two)
756  // to retrieve the result from the HI/LO registers.
757  //
758  // [2] For MIPS32, more than 8 steps is expensive as the constant could be
759  // materialized in 2 instructions, multiplication requires at least 4
760  // cycles, but another cycle (or two) to retrieve the result from the
761  // HI/LO registers.
762 
763  if (Steps > 12 && (Subtarget.isABI_N32() || Subtarget.isABI_N64()))
764  return false;
765 
766  if (Steps > 8 && Subtarget.isABI_O32())
767  return false;
768  }
769 
770  // If the value being multiplied is not supported natively, we have to pay
771  // an additional legalization cost, conservatively assume an increase in the
772  // cost of 3 instructions per step. This values for this heuristic were
773  // determined experimentally.
774  unsigned RegisterSize = DAG.getTargetLoweringInfo()
775  .getRegisterType(*DAG.getContext(), VT)
776  .getSizeInBits();
777  Steps *= (VT.getSizeInBits() != RegisterSize) * 3;
778  if (Steps > 27)
779  return false;
780 
781  return true;
782 }
783 
784 static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT,
785  EVT ShiftTy, SelectionDAG &DAG) {
786  // Return 0.
787  if (C == 0)
788  return DAG.getConstant(0, DL, VT);
789 
790  // Return x.
791  if (C == 1)
792  return X;
793 
794  // If c is power of 2, return (shl x, log2(c)).
795  if (C.isPowerOf2())
796  return DAG.getNode(ISD::SHL, DL, VT, X,
797  DAG.getConstant(C.logBase2(), DL, ShiftTy));
798 
799  unsigned BitWidth = C.getBitWidth();
800  APInt Floor = APInt(BitWidth, 1) << C.logBase2();
801  APInt Ceil = C.isNegative() ? APInt(BitWidth, 0) :
802  APInt(BitWidth, 1) << C.ceilLogBase2();
803 
804  // If |c - floor_c| <= |c - ceil_c|,
805  // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))),
806  // return (add constMult(x, floor_c), constMult(x, c - floor_c)).
807  if ((C - Floor).ule(Ceil - C)) {
808  SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG);
809  SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG);
810  return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
811  }
812 
813  // If |c - floor_c| > |c - ceil_c|,
814  // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)).
815  SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG);
816  SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG);
817  return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
818 }
819 
822  const MipsSETargetLowering *TL,
823  const MipsSubtarget &Subtarget) {
824  EVT VT = N->getValueType(0);
825 
826  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
828  C->getAPIntValue(), VT, DAG, Subtarget))
829  return genConstMult(N->getOperand(0), C->getAPIntValue(), SDLoc(N), VT,
830  TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT),
831  DAG);
832 
833  return SDValue(N, 0);
834 }
835 
836 static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty,
837  SelectionDAG &DAG,
838  const MipsSubtarget &Subtarget) {
839  // See if this is a vector splat immediate node.
840  APInt SplatValue, SplatUndef;
841  unsigned SplatBitSize;
842  bool HasAnyUndefs;
843  unsigned EltSize = Ty.getScalarSizeInBits();
845 
846  if (!Subtarget.hasDSP())
847  return SDValue();
848 
849  if (!BV ||
850  !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
851  EltSize, !Subtarget.isLittle()) ||
852  (SplatBitSize != EltSize) ||
853  (SplatValue.getZExtValue() >= EltSize))
854  return SDValue();
855 
856  SDLoc DL(N);
857  return DAG.getNode(Opc, DL, Ty, N->getOperand(0),
858  DAG.getConstant(SplatValue.getZExtValue(), DL, MVT::i32));
859 }
860 
863  const MipsSubtarget &Subtarget) {
864  EVT Ty = N->getValueType(0);
865 
866  if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
867  return SDValue();
868 
869  return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget);
870 }
871 
872 // Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold
873 // constant splats into MipsISD::SHRA_DSP for DSPr2.
874 //
875 // Performs the following transformations:
876 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its
877 // sign/zero-extension is completely overwritten by the new one performed by
878 // the ISD::SRA and ISD::SHL nodes.
879 // - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL
880 // sequence.
881 //
882 // See performDSPShiftCombine for more information about the transformation
883 // used for DSPr2.
886  const MipsSubtarget &Subtarget) {
887  EVT Ty = N->getValueType(0);
888 
889  if (Subtarget.hasMSA()) {
890  SDValue Op0 = N->getOperand(0);
891  SDValue Op1 = N->getOperand(1);
892 
893  // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d)
894  // where $d + sizeof($c) == 32
895  // or $d + sizeof($c) <= 32 and SExt
896  // -> (MipsVExtractSExt $a, $b, $c)
897  if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) {
898  SDValue Op0Op0 = Op0->getOperand(0);
899  ConstantSDNode *ShAmount = dyn_cast<ConstantSDNode>(Op1);
900 
901  if (!ShAmount)
902  return SDValue();
903 
904  if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT &&
906  return SDValue();
907 
908  EVT ExtendTy = cast<VTSDNode>(Op0Op0->getOperand(2))->getVT();
909  unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits();
910 
911  if (TotalBits == 32 ||
912  (Op0Op0->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT &&
913  TotalBits <= 32)) {
914  SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1),
915  Op0Op0->getOperand(2) };
916  return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, SDLoc(Op0Op0),
917  Op0Op0->getVTList(),
918  makeArrayRef(Ops, Op0Op0->getNumOperands()));
919  }
920  }
921  }
922 
923  if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget.hasDSPR2()))
924  return SDValue();
925 
926  return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget);
927 }
928 
929 
932  const MipsSubtarget &Subtarget) {
933  EVT Ty = N->getValueType(0);
934 
935  if (((Ty != MVT::v2i16) || !Subtarget.hasDSPR2()) && (Ty != MVT::v4i8))
936  return SDValue();
937 
938  return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget);
939 }
940 
941 static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) {
942  bool IsV216 = (Ty == MVT::v2i16);
943 
944  switch (CC) {
945  case ISD::SETEQ:
946  case ISD::SETNE: return true;
947  case ISD::SETLT:
948  case ISD::SETLE:
949  case ISD::SETGT:
950  case ISD::SETGE: return IsV216;
951  case ISD::SETULT:
952  case ISD::SETULE:
953  case ISD::SETUGT:
954  case ISD::SETUGE: return !IsV216;
955  default: return false;
956  }
957 }
958 
960  EVT Ty = N->getValueType(0);
961 
962  if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
963  return SDValue();
964 
965  if (!isLegalDSPCondCode(Ty, cast<CondCodeSDNode>(N->getOperand(2))->get()))
966  return SDValue();
967 
968  return DAG.getNode(MipsISD::SETCC_DSP, SDLoc(N), Ty, N->getOperand(0),
969  N->getOperand(1), N->getOperand(2));
970 }
971 
973  EVT Ty = N->getValueType(0);
974 
975  if (Ty == MVT::v2i16 || Ty == MVT::v4i8) {
976  SDValue SetCC = N->getOperand(0);
977 
978  if (SetCC.getOpcode() != MipsISD::SETCC_DSP)
979  return SDValue();
980 
981  return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty,
982  SetCC.getOperand(0), SetCC.getOperand(1),
983  N->getOperand(1), N->getOperand(2), SetCC.getOperand(2));
984  }
985 
986  return SDValue();
987 }
988 
990  const MipsSubtarget &Subtarget) {
991  EVT Ty = N->getValueType(0);
992 
993  if (Subtarget.hasMSA() && Ty.is128BitVector() && Ty.isInteger()) {
994  // Try the following combines:
995  // (xor (or $a, $b), (build_vector allones))
996  // (xor (or $a, $b), (bitcast (build_vector allones)))
997  SDValue Op0 = N->getOperand(0);
998  SDValue Op1 = N->getOperand(1);
999  SDValue NotOp;
1000 
1002  NotOp = Op1;
1003  else if (ISD::isBuildVectorAllOnes(Op1.getNode()))
1004  NotOp = Op0;
1005  else
1006  return SDValue();
1007 
1008  if (NotOp->getOpcode() == ISD::OR)
1009  return DAG.getNode(MipsISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0),
1010  NotOp->getOperand(1));
1011  }
1012 
1013  return SDValue();
1014 }
1015 
1016 SDValue
1018  SelectionDAG &DAG = DCI.DAG;
1019  SDValue Val;
1020 
1021  switch (N->getOpcode()) {
1022  case ISD::AND:
1023  Val = performANDCombine(N, DAG, DCI, Subtarget);
1024  break;
1025  case ISD::OR:
1026  Val = performORCombine(N, DAG, DCI, Subtarget);
1027  break;
1028  case ISD::MUL:
1029  return performMULCombine(N, DAG, DCI, this, Subtarget);
1030  case ISD::SHL:
1031  Val = performSHLCombine(N, DAG, DCI, Subtarget);
1032  break;
1033  case ISD::SRA:
1034  return performSRACombine(N, DAG, DCI, Subtarget);
1035  case ISD::SRL:
1036  return performSRLCombine(N, DAG, DCI, Subtarget);
1037  case ISD::VSELECT:
1038  return performVSELECTCombine(N, DAG);
1039  case ISD::XOR:
1040  Val = performXORCombine(N, DAG, Subtarget);
1041  break;
1042  case ISD::SETCC:
1043  Val = performSETCCCombine(N, DAG);
1044  break;
1045  }
1046 
1047  if (Val.getNode()) {
1048  LLVM_DEBUG(dbgs() << "\nMipsSE DAG Combine:\n";
1049  N->printrWithDepth(dbgs(), &DAG); dbgs() << "\n=> \n";
1050  Val.getNode()->printrWithDepth(dbgs(), &DAG); dbgs() << "\n");
1051  return Val;
1052  }
1053 
1055 }
1056 
1059  MachineBasicBlock *BB) const {
1060  switch (MI.getOpcode()) {
1061  default:
1063  case Mips::BPOSGE32_PSEUDO:
1064  return emitBPOSGE32(MI, BB);
1065  case Mips::SNZ_B_PSEUDO:
1066  return emitMSACBranchPseudo(MI, BB, Mips::BNZ_B);
1067  case Mips::SNZ_H_PSEUDO:
1068  return emitMSACBranchPseudo(MI, BB, Mips::BNZ_H);
1069  case Mips::SNZ_W_PSEUDO:
1070  return emitMSACBranchPseudo(MI, BB, Mips::BNZ_W);
1071  case Mips::SNZ_D_PSEUDO:
1072  return emitMSACBranchPseudo(MI, BB, Mips::BNZ_D);
1073  case Mips::SNZ_V_PSEUDO:
1074  return emitMSACBranchPseudo(MI, BB, Mips::BNZ_V);
1075  case Mips::SZ_B_PSEUDO:
1076  return emitMSACBranchPseudo(MI, BB, Mips::BZ_B);
1077  case Mips::SZ_H_PSEUDO:
1078  return emitMSACBranchPseudo(MI, BB, Mips::BZ_H);
1079  case Mips::SZ_W_PSEUDO:
1080  return emitMSACBranchPseudo(MI, BB, Mips::BZ_W);
1081  case Mips::SZ_D_PSEUDO:
1082  return emitMSACBranchPseudo(MI, BB, Mips::BZ_D);
1083  case Mips::SZ_V_PSEUDO:
1084  return emitMSACBranchPseudo(MI, BB, Mips::BZ_V);
1085  case Mips::COPY_FW_PSEUDO:
1086  return emitCOPY_FW(MI, BB);
1087  case Mips::COPY_FD_PSEUDO:
1088  return emitCOPY_FD(MI, BB);
1089  case Mips::INSERT_FW_PSEUDO:
1090  return emitINSERT_FW(MI, BB);
1091  case Mips::INSERT_FD_PSEUDO:
1092  return emitINSERT_FD(MI, BB);
1093  case Mips::INSERT_B_VIDX_PSEUDO:
1094  case Mips::INSERT_B_VIDX64_PSEUDO:
1095  return emitINSERT_DF_VIDX(MI, BB, 1, false);
1096  case Mips::INSERT_H_VIDX_PSEUDO:
1097  case Mips::INSERT_H_VIDX64_PSEUDO:
1098  return emitINSERT_DF_VIDX(MI, BB, 2, false);
1099  case Mips::INSERT_W_VIDX_PSEUDO:
1100  case Mips::INSERT_W_VIDX64_PSEUDO:
1101  return emitINSERT_DF_VIDX(MI, BB, 4, false);
1102  case Mips::INSERT_D_VIDX_PSEUDO:
1103  case Mips::INSERT_D_VIDX64_PSEUDO:
1104  return emitINSERT_DF_VIDX(MI, BB, 8, false);
1105  case Mips::INSERT_FW_VIDX_PSEUDO:
1106  case Mips::INSERT_FW_VIDX64_PSEUDO:
1107  return emitINSERT_DF_VIDX(MI, BB, 4, true);
1108  case Mips::INSERT_FD_VIDX_PSEUDO:
1109  case Mips::INSERT_FD_VIDX64_PSEUDO:
1110  return emitINSERT_DF_VIDX(MI, BB, 8, true);
1111  case Mips::FILL_FW_PSEUDO:
1112  return emitFILL_FW(MI, BB);
1113  case Mips::FILL_FD_PSEUDO:
1114  return emitFILL_FD(MI, BB);
1115  case Mips::FEXP2_W_1_PSEUDO:
1116  return emitFEXP2_W_1(MI, BB);
1117  case Mips::FEXP2_D_1_PSEUDO:
1118  return emitFEXP2_D_1(MI, BB);
1119  case Mips::ST_F16:
1120  return emitST_F16_PSEUDO(MI, BB);
1121  case Mips::LD_F16:
1122  return emitLD_F16_PSEUDO(MI, BB);
1123  case Mips::MSA_FP_EXTEND_W_PSEUDO:
1124  return emitFPEXTEND_PSEUDO(MI, BB, false);
1125  case Mips::MSA_FP_ROUND_W_PSEUDO:
1126  return emitFPROUND_PSEUDO(MI, BB, false);
1127  case Mips::MSA_FP_EXTEND_D_PSEUDO:
1128  return emitFPEXTEND_PSEUDO(MI, BB, true);
1129  case Mips::MSA_FP_ROUND_D_PSEUDO:
1130  return emitFPROUND_PSEUDO(MI, BB, true);
1131  }
1132 }
1133 
1134 bool MipsSETargetLowering::isEligibleForTailCallOptimization(
1135  const CCState &CCInfo, unsigned NextStackOffset,
1136  const MipsFunctionInfo &FI) const {
1137  if (!UseMipsTailCalls)
1138  return false;
1139 
1140  // Exception has to be cleared with eret.
1141  if (FI.isISR())
1142  return false;
1143 
1144  // Return false if either the callee or caller has a byval argument.
1145  if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg())
1146  return false;
1147 
1148  // Return true if the callee's argument area is no larger than the
1149  // caller's.
1150  return NextStackOffset <= FI.getIncomingArgSize();
1151 }
1152 
1153 void MipsSETargetLowering::
1154 getOpndList(SmallVectorImpl<SDValue> &Ops,
1155  std::deque<std::pair<unsigned, SDValue>> &RegsToPass,
1156  bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
1157  bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee,
1158  SDValue Chain) const {
1159  Ops.push_back(Callee);
1160  MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal,
1161  InternalLinkage, IsCallReloc, CLI, Callee,
1162  Chain);
1163 }
1164 
1165 SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1166  LoadSDNode &Nd = *cast<LoadSDNode>(Op);
1167 
1168  if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore)
1169  return MipsTargetLowering::lowerLOAD(Op, DAG);
1170 
1171  // Replace a double precision load with two i32 loads and a buildpair64.
1172  SDLoc DL(Op);
1173  SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
1174  EVT PtrVT = Ptr.getValueType();
1175 
1176  // i32 load from lower address.
1177  SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo(),
1178  Nd.getAlignment(), Nd.getMemOperand()->getFlags());
1179 
1180  // i32 load from higher address.
1181  Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT));
1182  SDValue Hi = DAG.getLoad(
1183  MVT::i32, DL, Lo.getValue(1), Ptr, MachinePointerInfo(),
1184  std::min(Nd.getAlignment(), 4U), Nd.getMemOperand()->getFlags());
1185 
1186  if (!Subtarget.isLittle())
1187  std::swap(Lo, Hi);
1188 
1189  SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
1190  SDValue Ops[2] = {BP, Hi.getValue(1)};
1191  return DAG.getMergeValues(Ops, DL);
1192 }
1193 
1194 SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1195  StoreSDNode &Nd = *cast<StoreSDNode>(Op);
1196 
1197  if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore)
1198  return MipsTargetLowering::lowerSTORE(Op, DAG);
1199 
1200  // Replace a double precision store with two extractelement64s and i32 stores.
1201  SDLoc DL(Op);
1202  SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
1203  EVT PtrVT = Ptr.getValueType();
1205  Val, DAG.getConstant(0, DL, MVT::i32));
1207  Val, DAG.getConstant(1, DL, MVT::i32));
1208 
1209  if (!Subtarget.isLittle())
1210  std::swap(Lo, Hi);
1211 
1212  // i32 store to lower address.
1213  Chain =
1214  DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(), Nd.getAlignment(),
1215  Nd.getMemOperand()->getFlags(), Nd.getAAInfo());
1216 
1217  // i32 store to higher address.
1218  Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT));
1219  return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(),
1220  std::min(Nd.getAlignment(), 4U),
1221  Nd.getMemOperand()->getFlags(), Nd.getAAInfo());
1222 }
1223 
1224 SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
1225  bool HasLo, bool HasHi,
1226  SelectionDAG &DAG) const {
1227  // MIPS32r6/MIPS64r6 removed accumulator based multiplies.
1229 
1230  EVT Ty = Op.getOperand(0).getValueType();
1231  SDLoc DL(Op);
1232  SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped,
1233  Op.getOperand(0), Op.getOperand(1));
1234  SDValue Lo, Hi;
1235 
1236  if (HasLo)
1237  Lo = DAG.getNode(MipsISD::MFLO, DL, Ty, Mult);
1238  if (HasHi)
1239  Hi = DAG.getNode(MipsISD::MFHI, DL, Ty, Mult);
1240 
1241  if (!HasLo || !HasHi)
1242  return HasLo ? Lo : Hi;
1243 
1244  SDValue Vals[] = { Lo, Hi };
1245  return DAG.getMergeValues(Vals, DL);
1246 }
1247 
1249  SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
1250  DAG.getConstant(0, DL, MVT::i32));
1251  SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
1252  DAG.getConstant(1, DL, MVT::i32));
1253  return DAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, InLo, InHi);
1254 }
1255 
1256 static SDValue extractLOHI(SDValue Op, const SDLoc &DL, SelectionDAG &DAG) {
1257  SDValue Lo = DAG.getNode(MipsISD::MFLO, DL, MVT::i32, Op);
1258  SDValue Hi = DAG.getNode(MipsISD::MFHI, DL, MVT::i32, Op);
1259  return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
1260 }
1261 
1262 // This function expands mips intrinsic nodes which have 64-bit input operands
1263 // or output values.
1264 //
1265 // out64 = intrinsic-node in64
1266 // =>
1267 // lo = copy (extract-element (in64, 0))
1268 // hi = copy (extract-element (in64, 1))
1269 // mips-specific-node
1270 // v0 = copy lo
1271 // v1 = copy hi
1272 // out64 = merge-values (v0, v1)
1273 //
1274 static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
1275  SDLoc DL(Op);
1276  bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other;
1278  unsigned OpNo = 0;
1279 
1280  // See if Op has a chain input.
1281  if (HasChainIn)
1282  Ops.push_back(Op->getOperand(OpNo++));
1283 
1284  // The next operand is the intrinsic opcode.
1286 
1287  // See if the next operand has type i64.
1288  SDValue Opnd = Op->getOperand(++OpNo), In64;
1289 
1290  if (Opnd.getValueType() == MVT::i64)
1291  In64 = initAccumulator(Opnd, DL, DAG);
1292  else
1293  Ops.push_back(Opnd);
1294 
1295  // Push the remaining operands.
1296  for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo)
1297  Ops.push_back(Op->getOperand(OpNo));
1298 
1299  // Add In64 to the end of the list.
1300  if (In64.getNode())
1301  Ops.push_back(In64);
1302 
1303  // Scan output.
1304  SmallVector<EVT, 2> ResTys;
1305 
1306  for (SDNode::value_iterator I = Op->value_begin(), E = Op->value_end();
1307  I != E; ++I)
1308  ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I);
1309 
1310  // Create node.
1311  SDValue Val = DAG.getNode(Opc, DL, ResTys, Ops);
1312  SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val;
1313 
1314  if (!HasChainIn)
1315  return Out;
1316 
1317  assert(Val->getValueType(1) == MVT::Other);
1318  SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) };
1319  return DAG.getMergeValues(Vals, DL);
1320 }
1321 
1322 // Lower an MSA copy intrinsic into the specified SelectionDAG node
1323 static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
1324  SDLoc DL(Op);
1325  SDValue Vec = Op->getOperand(1);
1326  SDValue Idx = Op->getOperand(2);
1327  EVT ResTy = Op->getValueType(0);
1328  EVT EltTy = Vec->getValueType(0).getVectorElementType();
1329 
1330  SDValue Result = DAG.getNode(Opc, DL, ResTy, Vec, Idx,
1331  DAG.getValueType(EltTy));
1332 
1333  return Result;
1334 }
1335 
1336 static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) {
1337  EVT ResVecTy = Op->getValueType(0);
1338  EVT ViaVecTy = ResVecTy;
1339  bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian();
1340  SDLoc DL(Op);
1341 
1342  // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and
1343  // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating
1344  // lanes.
1345  SDValue LaneA = Op->getOperand(OpNr);
1346  SDValue LaneB;
1347 
1348  if (ResVecTy == MVT::v2i64) {
1349  // In case of the index being passed as an immediate value, set the upper
1350  // lane to 0 so that the splati.d instruction can be matched.
1351  if (isa<ConstantSDNode>(LaneA))
1352  LaneB = DAG.getConstant(0, DL, MVT::i32);
1353  // Having the index passed in a register, set the upper lane to the same
1354  // value as the lower - this results in the BUILD_VECTOR node not being
1355  // expanded through stack. This way we are able to pattern match the set of
1356  // nodes created here to splat.d.
1357  else
1358  LaneB = LaneA;
1359  ViaVecTy = MVT::v4i32;
1360  if(BigEndian)
1361  std::swap(LaneA, LaneB);
1362  } else
1363  LaneB = LaneA;
1364 
1365  SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB,
1366  LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB };
1367 
1368  SDValue Result = DAG.getBuildVector(
1369  ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements()));
1370 
1371  if (ViaVecTy != ResVecTy) {
1372  SDValue One = DAG.getConstant(1, DL, ViaVecTy);
1373  Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy,
1374  DAG.getNode(ISD::AND, DL, ViaVecTy, Result, One));
1375  }
1376 
1377  return Result;
1378 }
1379 
1380 static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG,
1381  bool IsSigned = false) {
1382  return DAG.getConstant(
1384  Op->getConstantOperandVal(ImmOp), IsSigned),
1385  SDLoc(Op), Op->getValueType(0));
1386 }
1387 
1388 static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue,
1389  bool BigEndian, SelectionDAG &DAG) {
1390  EVT ViaVecTy = VecTy;
1391  SDValue SplatValueA = SplatValue;
1392  SDValue SplatValueB = SplatValue;
1393  SDLoc DL(SplatValue);
1394 
1395  if (VecTy == MVT::v2i64) {
1396  // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's.
1397  ViaVecTy = MVT::v4i32;
1398 
1399  SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue);
1400  SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue,
1401  DAG.getConstant(32, DL, MVT::i32));
1402  SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB);
1403  }
1404 
1405  // We currently hold the parts in little endian order. Swap them if
1406  // necessary.
1407  if (BigEndian)
1408  std::swap(SplatValueA, SplatValueB);
1409 
1410  SDValue Ops[16] = { SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1411  SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1412  SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1413  SplatValueA, SplatValueB, SplatValueA, SplatValueB };
1414 
1415  SDValue Result = DAG.getBuildVector(
1416  ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements()));
1417 
1418  if (VecTy != ViaVecTy)
1419  Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result);
1420 
1421  return Result;
1422 }
1423 
1425  unsigned Opc, SDValue Imm,
1426  bool BigEndian) {
1427  EVT VecTy = Op->getValueType(0);
1428  SDValue Exp2Imm;
1429  SDLoc DL(Op);
1430 
1431  // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it
1432  // here for now.
1433  if (VecTy == MVT::v2i64) {
1434  if (ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(Imm)) {
1435  APInt BitImm = APInt(64, 1) << CImm->getAPIntValue();
1436 
1437  SDValue BitImmHiOp = DAG.getConstant(BitImm.lshr(32).trunc(32), DL,
1438  MVT::i32);
1439  SDValue BitImmLoOp = DAG.getConstant(BitImm.trunc(32), DL, MVT::i32);
1440 
1441  if (BigEndian)
1442  std::swap(BitImmLoOp, BitImmHiOp);
1443 
1444  Exp2Imm = DAG.getNode(
1445  ISD::BITCAST, DL, MVT::v2i64,
1446  DAG.getBuildVector(MVT::v4i32, DL,
1447  {BitImmLoOp, BitImmHiOp, BitImmLoOp, BitImmHiOp}));
1448  }
1449  }
1450 
1451  if (!Exp2Imm.getNode()) {
1452  // We couldnt constant fold, do a vector shift instead
1453 
1454  // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since
1455  // only values 0-63 are valid.
1456  if (VecTy == MVT::v2i64)
1457  Imm = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Imm);
1458 
1459  Exp2Imm = getBuildVectorSplat(VecTy, Imm, BigEndian, DAG);
1460 
1461  Exp2Imm = DAG.getNode(ISD::SHL, DL, VecTy, DAG.getConstant(1, DL, VecTy),
1462  Exp2Imm);
1463  }
1464 
1465  return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm);
1466 }
1467 
1469  SDLoc DL(Op);
1470  EVT ResTy = Op->getValueType(0);
1471  SDValue Vec = Op->getOperand(2);
1472  bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian();
1473  MVT ResEltTy = ResTy == MVT::v2i64 ? MVT::i64 : MVT::i32;
1474  SDValue ConstValue = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1,
1475  DL, ResEltTy);
1476  SDValue SplatVec = getBuildVectorSplat(ResTy, ConstValue, BigEndian, DAG);
1477 
1478  return DAG.getNode(ISD::AND, DL, ResTy, Vec, SplatVec);
1479 }
1480 
1482  EVT ResTy = Op->getValueType(0);
1483  SDLoc DL(Op);
1484  SDValue One = DAG.getConstant(1, DL, ResTy);
1485  SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Op, DAG));
1486 
1487  return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1),
1488  DAG.getNOT(DL, Bit, ResTy));
1489 }
1490 
1492  SDLoc DL(Op);
1493  EVT ResTy = Op->getValueType(0);
1494  APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1)
1495  << cast<ConstantSDNode>(Op->getOperand(2))->getAPIntValue();
1496  SDValue BitMask = DAG.getConstant(~BitImm, DL, ResTy);
1497 
1498  return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask);
1499 }
1500 
1501 SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1502  SelectionDAG &DAG) const {
1503  SDLoc DL(Op);
1504  unsigned Intrinsic = cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue();
1505  switch (Intrinsic) {
1506  default:
1507  return SDValue();
1508  case Intrinsic::mips_shilo:
1509  return lowerDSPIntr(Op, DAG, MipsISD::SHILO);
1511  return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL);
1513  return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR);
1515  return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL);
1517  return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR);
1519  return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH);
1521  return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH);
1523  return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH);
1525  return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH);
1527  return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH);
1528  case Intrinsic::mips_mult:
1529  return lowerDSPIntr(Op, DAG, MipsISD::Mult);
1530  case Intrinsic::mips_multu:
1531  return lowerDSPIntr(Op, DAG, MipsISD::Multu);
1532  case Intrinsic::mips_madd:
1533  return lowerDSPIntr(Op, DAG, MipsISD::MAdd);
1534  case Intrinsic::mips_maddu:
1535  return lowerDSPIntr(Op, DAG, MipsISD::MAddu);
1536  case Intrinsic::mips_msub:
1537  return lowerDSPIntr(Op, DAG, MipsISD::MSub);
1538  case Intrinsic::mips_msubu:
1539  return lowerDSPIntr(Op, DAG, MipsISD::MSubu);
1544  return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1),
1545  Op->getOperand(2));
1550  return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1),
1551  lowerMSASplatImm(Op, 2, DAG));
1552  case Intrinsic::mips_and_v:
1553  return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1),
1554  Op->getOperand(2));
1556  return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1),
1557  lowerMSASplatImm(Op, 2, DAG));
1562  return lowerMSABitClear(Op, DAG);
1567  return lowerMSABitClearImm(Op, DAG);
1571  case Intrinsic::mips_binsli_d: {
1572  // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear)
1573  EVT VecTy = Op->getValueType(0);
1574  EVT EltTy = VecTy.getVectorElementType();
1575  if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits())
1576  report_fatal_error("Immediate out of range");
1578  Op->getConstantOperandVal(3) + 1);
1579  return DAG.getNode(ISD::VSELECT, DL, VecTy,
1580  DAG.getConstant(Mask, DL, VecTy, true),
1581  Op->getOperand(2), Op->getOperand(1));
1582  }
1586  case Intrinsic::mips_binsri_d: {
1587  // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear)
1588  EVT VecTy = Op->getValueType(0);
1589  EVT EltTy = VecTy.getVectorElementType();
1590  if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits())
1591  report_fatal_error("Immediate out of range");
1593  Op->getConstantOperandVal(3) + 1);
1594  return DAG.getNode(ISD::VSELECT, DL, VecTy,
1595  DAG.getConstant(Mask, DL, VecTy, true),
1596  Op->getOperand(2), Op->getOperand(1));
1597  }
1599  return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3),
1600  Op->getOperand(2), Op->getOperand(1));
1602  return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1603  lowerMSASplatImm(Op, 3, DAG), Op->getOperand(2),
1604  Op->getOperand(1));
1605  case Intrinsic::mips_bmz_v:
1606  return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3),
1607  Op->getOperand(1), Op->getOperand(2));
1609  return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1610  lowerMSASplatImm(Op, 3, DAG), Op->getOperand(1),
1611  Op->getOperand(2));
1615  case Intrinsic::mips_bneg_d: {
1616  EVT VecTy = Op->getValueType(0);
1617  SDValue One = DAG.getConstant(1, DL, VecTy);
1618 
1619  return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1),
1620  DAG.getNode(ISD::SHL, DL, VecTy, One,
1621  truncateVecElts(Op, DAG)));
1622  }
1627  return lowerMSABinaryBitImmIntr(Op, DAG, ISD::XOR, Op->getOperand(2),
1628  !Subtarget.isLittle());
1629  case Intrinsic::mips_bnz_b:
1630  case Intrinsic::mips_bnz_h:
1631  case Intrinsic::mips_bnz_w:
1632  case Intrinsic::mips_bnz_d:
1633  return DAG.getNode(MipsISD::VALL_NONZERO, DL, Op->getValueType(0),
1634  Op->getOperand(1));
1635  case Intrinsic::mips_bnz_v:
1636  return DAG.getNode(MipsISD::VANY_NONZERO, DL, Op->getValueType(0),
1637  Op->getOperand(1));
1639  // bsel_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear)
1640  return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1641  Op->getOperand(1), Op->getOperand(3),
1642  Op->getOperand(2));
1644  // bseli_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear)
1645  return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1646  Op->getOperand(1), lowerMSASplatImm(Op, 3, DAG),
1647  Op->getOperand(2));
1651  case Intrinsic::mips_bset_d: {
1652  EVT VecTy = Op->getValueType(0);
1653  SDValue One = DAG.getConstant(1, DL, VecTy);
1654 
1655  return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1),
1656  DAG.getNode(ISD::SHL, DL, VecTy, One,
1657  truncateVecElts(Op, DAG)));
1658  }
1663  return lowerMSABinaryBitImmIntr(Op, DAG, ISD::OR, Op->getOperand(2),
1664  !Subtarget.isLittle());
1665  case Intrinsic::mips_bz_b:
1666  case Intrinsic::mips_bz_h:
1667  case Intrinsic::mips_bz_w:
1668  case Intrinsic::mips_bz_d:
1669  return DAG.getNode(MipsISD::VALL_ZERO, DL, Op->getValueType(0),
1670  Op->getOperand(1));
1671  case Intrinsic::mips_bz_v:
1672  return DAG.getNode(MipsISD::VANY_ZERO, DL, Op->getValueType(0),
1673  Op->getOperand(1));
1674  case Intrinsic::mips_ceq_b:
1675  case Intrinsic::mips_ceq_h:
1676  case Intrinsic::mips_ceq_w:
1677  case Intrinsic::mips_ceq_d:
1678  return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1679  Op->getOperand(2), ISD::SETEQ);
1684  return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1685  lowerMSASplatImm(Op, 2, DAG, true), ISD::SETEQ);
1690  return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1691  Op->getOperand(2), ISD::SETLE);
1696  return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1697  lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLE);
1702  return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1703  Op->getOperand(2), ISD::SETULE);
1708  return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1709  lowerMSASplatImm(Op, 2, DAG), ISD::SETULE);
1714  return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1715  Op->getOperand(2), ISD::SETLT);
1720  return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1721  lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLT);
1726  return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1727  Op->getOperand(2), ISD::SETULT);
1732  return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1733  lowerMSASplatImm(Op, 2, DAG), ISD::SETULT);
1739  if (Subtarget.hasMips64())
1740  // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64.
1742  else {
1743  // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
1744  // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
1745  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op),
1746  Op->getValueType(0), Op->getOperand(1),
1747  Op->getOperand(2));
1748  }
1754  if (Subtarget.hasMips64())
1755  // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64.
1757  else {
1758  // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
1759  // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
1760  // Note: When i64 is illegal, this results in copy_s.w instructions
1761  // instead of copy_u.w instructions. This makes no difference to the
1762  // behaviour since i64 is only illegal when the register file is 32-bit.
1763  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op),
1764  Op->getValueType(0), Op->getOperand(1),
1765  Op->getOperand(2));
1766  }
1771  return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1),
1772  Op->getOperand(2));
1777  return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1),
1778  Op->getOperand(2));
1781  // TODO: If intrinsics have fast-math-flags, propagate them.
1782  return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1),
1783  Op->getOperand(2));
1784  // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away
1787  return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1788  Op->getOperand(2), ISD::SETOEQ);
1791  return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1792  Op->getOperand(2), ISD::SETOLE);
1795  return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1796  Op->getOperand(2), ISD::SETOLT);
1799  return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1800  Op->getOperand(2), ISD::SETONE);
1803  return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1804  Op->getOperand(2), ISD::SETO);
1807  return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1808  Op->getOperand(2), ISD::SETUEQ);
1811  return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1812  Op->getOperand(2), ISD::SETULE);
1815  return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1816  Op->getOperand(2), ISD::SETULT);
1819  return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1820  Op->getOperand(2), ISD::SETUO);
1823  return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1824  Op->getOperand(2), ISD::SETUNE);
1827  // TODO: If intrinsics have fast-math-flags, propagate them.
1828  return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1),
1829  Op->getOperand(2));
1832  return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0),
1833  Op->getOperand(1));
1836  return DAG.getNode(ISD::SINT_TO_FP, DL, Op->getValueType(0),
1837  Op->getOperand(1));
1841  case Intrinsic::mips_fill_d: {
1842  EVT ResTy = Op->getValueType(0);
1844  Op->getOperand(1));
1845 
1846  // If ResTy is v2i64 then the type legalizer will break this node down into
1847  // an equivalent v4i32.
1848  return DAG.getBuildVector(ResTy, DL, Ops);
1849  }
1851  case Intrinsic::mips_fexp2_d: {
1852  // TODO: If intrinsics have fast-math-flags, propagate them.
1853  EVT ResTy = Op->getValueType(0);
1854  return DAG.getNode(
1855  ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1),
1856  DAG.getNode(ISD::FEXP2, SDLoc(Op), ResTy, Op->getOperand(2)));
1857  }
1860  return DAG.getNode(ISD::FLOG2, DL, Op->getValueType(0), Op->getOperand(1));
1863  return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0),
1864  Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
1867  // TODO: If intrinsics have fast-math-flags, propagate them.
1868  return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1),
1869  Op->getOperand(2));
1871  case Intrinsic::mips_fmsub_d: {
1872  // TODO: If intrinsics have fast-math-flags, propagate them.
1873  return DAG.getNode(MipsISD::FMS, SDLoc(Op), Op->getValueType(0),
1874  Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
1875  }
1878  return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1));
1881  return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1));
1884  // TODO: If intrinsics have fast-math-flags, propagate them.
1885  return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1),
1886  Op->getOperand(2));
1889  return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0),
1890  Op->getOperand(1));
1893  return DAG.getNode(ISD::FP_TO_SINT, DL, Op->getValueType(0),
1894  Op->getOperand(1));
1899  return DAG.getNode(MipsISD::ILVEV, DL, Op->getValueType(0),
1900  Op->getOperand(1), Op->getOperand(2));
1905  return DAG.getNode(MipsISD::ILVL, DL, Op->getValueType(0),
1906  Op->getOperand(1), Op->getOperand(2));
1911  return DAG.getNode(MipsISD::ILVOD, DL, Op->getValueType(0),
1912  Op->getOperand(1), Op->getOperand(2));
1917  return DAG.getNode(MipsISD::ILVR, DL, Op->getValueType(0),
1918  Op->getOperand(1), Op->getOperand(2));
1923  return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0),
1924  Op->getOperand(1), Op->getOperand(3), Op->getOperand(2));
1928  case Intrinsic::mips_insve_d: {
1929  // Report an error for out of range values.
1930  int64_t Max;
1931  switch (Intrinsic) {
1932  case Intrinsic::mips_insve_b: Max = 15; break;
1933  case Intrinsic::mips_insve_h: Max = 7; break;
1934  case Intrinsic::mips_insve_w: Max = 3; break;
1935  case Intrinsic::mips_insve_d: Max = 1; break;
1936  default: llvm_unreachable("Unmatched intrinsic");
1937  }
1938  int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
1939  if (Value < 0 || Value > Max)
1940  report_fatal_error("Immediate out of range");
1941  return DAG.getNode(MipsISD::INSVE, DL, Op->getValueType(0),
1942  Op->getOperand(1), Op->getOperand(2), Op->getOperand(3),
1943  DAG.getConstant(0, DL, MVT::i32));
1944  }
1945  case Intrinsic::mips_ldi_b:
1946  case Intrinsic::mips_ldi_h:
1947  case Intrinsic::mips_ldi_w:
1948  case Intrinsic::mips_ldi_d:
1949  return lowerMSASplatImm(Op, 1, DAG, true);
1950  case Intrinsic::mips_lsa:
1951  case Intrinsic::mips_dlsa: {
1952  EVT ResTy = Op->getValueType(0);
1953  return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1),
1954  DAG.getNode(ISD::SHL, SDLoc(Op), ResTy,
1955  Op->getOperand(2), Op->getOperand(3)));
1956  }
1960  case Intrinsic::mips_maddv_d: {
1961  EVT ResTy = Op->getValueType(0);
1962  return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1),
1963  DAG.getNode(ISD::MUL, SDLoc(Op), ResTy,
1964  Op->getOperand(2), Op->getOperand(3)));
1965  }
1970  return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0),
1971  Op->getOperand(1), Op->getOperand(2));
1976  return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0),
1977  Op->getOperand(1), Op->getOperand(2));
1982  return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0),
1983  Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true));
1988  return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0),
1989  Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
1994  return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0),
1995  Op->getOperand(1), Op->getOperand(2));
2000  return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0),
2001  Op->getOperand(1), Op->getOperand(2));
2006  return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0),
2007  Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true));
2012  return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0),
2013  Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2018  return DAG.getNode(ISD::SREM, DL, Op->getValueType(0), Op->getOperand(1),
2019  Op->getOperand(2));
2024  return DAG.getNode(ISD::UREM, DL, Op->getValueType(0), Op->getOperand(1),
2025  Op->getOperand(2));
2030  return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1),
2031  Op->getOperand(2));
2035  case Intrinsic::mips_msubv_d: {
2036  EVT ResTy = Op->getValueType(0);
2037  return DAG.getNode(ISD::SUB, SDLoc(Op), ResTy, Op->getOperand(1),
2038  DAG.getNode(ISD::MUL, SDLoc(Op), ResTy,
2039  Op->getOperand(2), Op->getOperand(3)));
2040  }
2045  return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1));
2046  case Intrinsic::mips_nor_v: {
2047  SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0),
2048  Op->getOperand(1), Op->getOperand(2));
2049  return DAG.getNOT(DL, Res, Res->getValueType(0));
2050  }
2051  case Intrinsic::mips_nori_b: {
2052  SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0),
2053  Op->getOperand(1),
2054  lowerMSASplatImm(Op, 2, DAG));
2055  return DAG.getNOT(DL, Res, Res->getValueType(0));
2056  }
2057  case Intrinsic::mips_or_v:
2058  return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1),
2059  Op->getOperand(2));
2060  case Intrinsic::mips_ori_b:
2061  return DAG.getNode(ISD::OR, DL, Op->getValueType(0),
2062  Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2067  return DAG.getNode(MipsISD::PCKEV, DL, Op->getValueType(0),
2068  Op->getOperand(1), Op->getOperand(2));
2073  return DAG.getNode(MipsISD::PCKOD, DL, Op->getValueType(0),
2074  Op->getOperand(1), Op->getOperand(2));
2079  return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1));
2087  case Intrinsic::mips_sat_u_d: {
2088  // Report an error for out of range values.
2089  int64_t Max;
2090  switch (Intrinsic) {
2092  case Intrinsic::mips_sat_u_b: Max = 7; break;
2094  case Intrinsic::mips_sat_u_h: Max = 15; break;
2096  case Intrinsic::mips_sat_u_w: Max = 31; break;
2098  case Intrinsic::mips_sat_u_d: Max = 63; break;
2099  default: llvm_unreachable("Unmatched intrinsic");
2100  }
2101  int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2102  if (Value < 0 || Value > Max)
2103  report_fatal_error("Immediate out of range");
2104  return SDValue();
2105  }
2106  case Intrinsic::mips_shf_b:
2107  case Intrinsic::mips_shf_h:
2108  case Intrinsic::mips_shf_w: {
2109  int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2110  if (Value < 0 || Value > 255)
2111  report_fatal_error("Immediate out of range");
2112  return DAG.getNode(MipsISD::SHF, DL, Op->getValueType(0),
2113  Op->getOperand(2), Op->getOperand(1));
2114  }
2118  case Intrinsic::mips_sldi_d: {
2119  // Report an error for out of range values.
2120  int64_t Max;
2121  switch (Intrinsic) {
2122  case Intrinsic::mips_sldi_b: Max = 15; break;
2123  case Intrinsic::mips_sldi_h: Max = 7; break;
2124  case Intrinsic::mips_sldi_w: Max = 3; break;
2125  case Intrinsic::mips_sldi_d: Max = 1; break;
2126  default: llvm_unreachable("Unmatched intrinsic");
2127  }
2128  int64_t Value = cast<ConstantSDNode>(Op->getOperand(3))->getSExtValue();
2129  if (Value < 0 || Value > Max)
2130  report_fatal_error("Immediate out of range");
2131  return SDValue();
2132  }
2133  case Intrinsic::mips_sll_b:
2134  case Intrinsic::mips_sll_h:
2135  case Intrinsic::mips_sll_w:
2136  case Intrinsic::mips_sll_d:
2137  return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1),
2138  truncateVecElts(Op, DAG));
2143  return DAG.getNode(ISD::SHL, DL, Op->getValueType(0),
2144  Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2149  // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle
2150  // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because
2151  // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32.
2152  // Instead we lower to MipsISD::VSHF and match from there.
2153  return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
2154  lowerMSASplatZExt(Op, 2, DAG), Op->getOperand(1),
2155  Op->getOperand(1));
2160  return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
2161  lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1),
2162  Op->getOperand(1));
2163  case Intrinsic::mips_sra_b:
2164  case Intrinsic::mips_sra_h:
2165  case Intrinsic::mips_sra_w:
2166  case Intrinsic::mips_sra_d:
2167  return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1),
2168  truncateVecElts(Op, DAG));
2173  return DAG.getNode(ISD::SRA, DL, Op->getValueType(0),
2174  Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2178  case Intrinsic::mips_srari_d: {
2179  // Report an error for out of range values.
2180  int64_t Max;
2181  switch (Intrinsic) {
2182  case Intrinsic::mips_srari_b: Max = 7; break;
2183  case Intrinsic::mips_srari_h: Max = 15; break;
2184  case Intrinsic::mips_srari_w: Max = 31; break;
2185  case Intrinsic::mips_srari_d: Max = 63; break;
2186  default: llvm_unreachable("Unmatched intrinsic");
2187  }
2188  int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2189  if (Value < 0 || Value > Max)
2190  report_fatal_error("Immediate out of range");
2191  return SDValue();
2192  }
2193  case Intrinsic::mips_srl_b:
2194  case Intrinsic::mips_srl_h:
2195  case Intrinsic::mips_srl_w:
2196  case Intrinsic::mips_srl_d:
2197  return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1),
2198  truncateVecElts(Op, DAG));
2203  return DAG.getNode(ISD::SRL, DL, Op->getValueType(0),
2204  Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2208  case Intrinsic::mips_srlri_d: {
2209  // Report an error for out of range values.
2210  int64_t Max;
2211  switch (Intrinsic) {
2212  case Intrinsic::mips_srlri_b: Max = 7; break;
2213  case Intrinsic::mips_srlri_h: Max = 15; break;
2214  case Intrinsic::mips_srlri_w: Max = 31; break;
2215  case Intrinsic::mips_srlri_d: Max = 63; break;
2216  default: llvm_unreachable("Unmatched intrinsic");
2217  }
2218  int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2219  if (Value < 0 || Value > Max)
2220  report_fatal_error("Immediate out of range");
2221  return SDValue();
2222  }
2227  return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1),
2228  Op->getOperand(2));
2233  return DAG.getNode(ISD::SUB, DL, Op->getValueType(0),
2234  Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2239  return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
2240  Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
2241  case Intrinsic::mips_xor_v:
2242  return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1),
2243  Op->getOperand(2));
2245  return DAG.getNode(ISD::XOR, DL, Op->getValueType(0),
2246  Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2248  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2249  return DAG.getNode(MipsISD::ThreadPointer, DL, PtrVT);
2250  }
2251  }
2252 }
2253 
2255  const MipsSubtarget &Subtarget) {
2256  SDLoc DL(Op);
2257  SDValue ChainIn = Op->getOperand(0);
2258  SDValue Address = Op->getOperand(2);
2259  SDValue Offset = Op->getOperand(3);
2260  EVT ResTy = Op->getValueType(0);
2261  EVT PtrTy = Address->getValueType(0);
2262 
2263  // For N64 addresses have the underlying type MVT::i64. This intrinsic
2264  // however takes an i32 signed constant offset. The actual type of the
2265  // intrinsic is a scaled signed i10.
2266  if (Subtarget.isABI_N64())
2267  Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset);
2268 
2269  Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset);
2270  return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(),
2271  /* Alignment = */ 16);
2272 }
2273 
2274 SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
2275  SelectionDAG &DAG) const {
2276  unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
2277  switch (Intr) {
2278  default:
2279  return SDValue();
2280  case Intrinsic::mips_extp:
2281  return lowerDSPIntr(Op, DAG, MipsISD::EXTP);
2283  return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP);
2285  return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W);
2287  return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W);
2289  return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W);
2291  return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H);
2293  return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP);
2295  return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH);
2297  return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL);
2299  return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR);
2301  return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL);
2303  return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR);
2305  return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH);
2307  return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH);
2309  return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W);
2311  return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W);
2313  return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH);
2315  return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH);
2317  return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH);
2319  return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH);
2320  case Intrinsic::mips_ld_b:
2321  case Intrinsic::mips_ld_h:
2322  case Intrinsic::mips_ld_w:
2323  case Intrinsic::mips_ld_d:
2324  return lowerMSALoadIntr(Op, DAG, Intr, Subtarget);
2325  }
2326 }
2327 
2329  const MipsSubtarget &Subtarget) {
2330  SDLoc DL(Op);
2331  SDValue ChainIn = Op->getOperand(0);
2332  SDValue Value = Op->getOperand(2);
2333  SDValue Address = Op->getOperand(3);
2334  SDValue Offset = Op->getOperand(4);
2335  EVT PtrTy = Address->getValueType(0);
2336 
2337  // For N64 addresses have the underlying type MVT::i64. This intrinsic
2338  // however takes an i32 signed constant offset. The actual type of the
2339  // intrinsic is a scaled signed i10.
2340  if (Subtarget.isABI_N64())
2341  Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset);
2342 
2343  Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset);
2344 
2345  return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(),
2346  /* Alignment = */ 16);
2347 }
2348 
2349 SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op,
2350  SelectionDAG &DAG) const {
2351  unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
2352  switch (Intr) {
2353  default:
2354  return SDValue();
2355  case Intrinsic::mips_st_b:
2356  case Intrinsic::mips_st_h:
2357  case Intrinsic::mips_st_w:
2358  case Intrinsic::mips_st_d:
2359  return lowerMSAStoreIntr(Op, DAG, Intr, Subtarget);
2360  }
2361 }
2362 
2363 // Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT.
2364 //
2365 // The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We
2366 // choose to sign-extend but we could have equally chosen zero-extend. The
2367 // DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT
2368 // result into this node later (possibly changing it to a zero-extend in the
2369 // process).
2370 SDValue MipsSETargetLowering::
2371 lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
2372  SDLoc DL(Op);
2373  EVT ResTy = Op->getValueType(0);
2374  SDValue Op0 = Op->getOperand(0);
2375  EVT VecTy = Op0->getValueType(0);
2376 
2377  if (!VecTy.is128BitVector())
2378  return SDValue();
2379 
2380  if (ResTy.isInteger()) {
2381  SDValue Op1 = Op->getOperand(1);
2382  EVT EltTy = VecTy.getVectorElementType();
2383  return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1,
2384  DAG.getValueType(EltTy));
2385  }
2386 
2387  return Op;
2388 }
2389 
2390 static bool isConstantOrUndef(const SDValue Op) {
2391  if (Op->isUndef())
2392  return true;
2393  if (isa<ConstantSDNode>(Op))
2394  return true;
2395  if (isa<ConstantFPSDNode>(Op))
2396  return true;
2397  return false;
2398 }
2399 
2401  for (unsigned i = 0; i < Op->getNumOperands(); ++i)
2402  if (isConstantOrUndef(Op->getOperand(i)))
2403  return true;
2404  return false;
2405 }
2406 
2407 // Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the
2408 // backend.
2409 //
2410 // Lowers according to the following rules:
2411 // - Constant splats are legal as-is as long as the SplatBitSize is a power of
2412 // 2 less than or equal to 64 and the value fits into a signed 10-bit
2413 // immediate
2414 // - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize
2415 // is a power of 2 less than or equal to 64 and the value does not fit into a
2416 // signed 10-bit immediate
2417 // - Non-constant splats are legal as-is.
2418 // - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT.
2419 // - All others are illegal and must be expanded.
2420 SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op,
2421  SelectionDAG &DAG) const {
2422  BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
2423  EVT ResTy = Op->getValueType(0);
2424  SDLoc DL(Op);
2425  APInt SplatValue, SplatUndef;
2426  unsigned SplatBitSize;
2427  bool HasAnyUndefs;
2428 
2429  if (!Subtarget.hasMSA() || !ResTy.is128BitVector())
2430  return SDValue();
2431 
2432  if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
2433  HasAnyUndefs, 8,
2434  !Subtarget.isLittle()) && SplatBitSize <= 64) {
2435  // We can only cope with 8, 16, 32, or 64-bit elements
2436  if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
2437  SplatBitSize != 64)
2438  return SDValue();
2439 
2440  // If the value isn't an integer type we will have to bitcast
2441  // from an integer type first. Also, if there are any undefs, we must
2442  // lower them to defined values first.
2443  if (ResTy.isInteger() && !HasAnyUndefs)
2444  return Op;
2445 
2446  EVT ViaVecTy;
2447 
2448  switch (SplatBitSize) {
2449  default:
2450  return SDValue();
2451  case 8:
2452  ViaVecTy = MVT::v16i8;
2453  break;
2454  case 16:
2455  ViaVecTy = MVT::v8i16;
2456  break;
2457  case 32:
2458  ViaVecTy = MVT::v4i32;
2459  break;
2460  case 64:
2461  // There's no fill.d to fall back on for 64-bit values
2462  return SDValue();
2463  }
2464 
2465  // SelectionDAG::getConstant will promote SplatValue appropriately.
2466  SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
2467 
2468  // Bitcast to the type we originally wanted
2469  if (ViaVecTy != ResTy)
2470  Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
2471 
2472  return Result;
2473  } else if (DAG.isSplatValue(Op, /* AllowUndefs */ false))
2474  return Op;
2475  else if (!isConstantOrUndefBUILD_VECTOR(Node)) {
2476  // Use INSERT_VECTOR_ELT operations rather than expand to stores.
2477  // The resulting code is the same length as the expansion, but it doesn't
2478  // use memory operations
2479  EVT ResTy = Node->getValueType(0);
2480 
2481  assert(ResTy.isVector());
2482 
2483  unsigned NumElts = ResTy.getVectorNumElements();
2484  SDValue Vector = DAG.getUNDEF(ResTy);
2485  for (unsigned i = 0; i < NumElts; ++i) {
2486  Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector,
2487  Node->getOperand(i),
2488  DAG.getConstant(i, DL, MVT::i32));
2489  }
2490  return Vector;
2491  }
2492 
2493  return SDValue();
2494 }
2495 
2496 // Lower VECTOR_SHUFFLE into SHF (if possible).
2497 //
2498 // SHF splits the vector into blocks of four elements, then shuffles these
2499 // elements according to a <4 x i2> constant (encoded as an integer immediate).
2500 //
2501 // It is therefore possible to lower into SHF when the mask takes the form:
2502 // <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
2503 // When undef's appear they are treated as if they were whatever value is
2504 // necessary in order to fit the above forms.
2505 //
2506 // For example:
2507 // %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
2508 // <8 x i32> <i32 3, i32 2, i32 1, i32 0,
2509 // i32 7, i32 6, i32 5, i32 4>
2510 // is lowered to:
2511 // (SHF_H $w0, $w1, 27)
2512 // where the 27 comes from:
2513 // 3 + (2 << 2) + (1 << 4) + (0 << 6)
2515  SmallVector<int, 16> Indices,
2516  SelectionDAG &DAG) {
2517  int SHFIndices[4] = { -1, -1, -1, -1 };
2518 
2519  if (Indices.size() < 4)
2520  return SDValue();
2521 
2522  for (unsigned i = 0; i < 4; ++i) {
2523  for (unsigned j = i; j < Indices.size(); j += 4) {
2524  int Idx = Indices[j];
2525 
2526  // Convert from vector index to 4-element subvector index
2527  // If an index refers to an element outside of the subvector then give up
2528  if (Idx != -1) {
2529  Idx -= 4 * (j / 4);
2530  if (Idx < 0 || Idx >= 4)
2531  return SDValue();
2532  }
2533 
2534  // If the mask has an undef, replace it with the current index.
2535  // Note that it might still be undef if the current index is also undef
2536  if (SHFIndices[i] == -1)
2537  SHFIndices[i] = Idx;
2538 
2539  // Check that non-undef values are the same as in the mask. If they
2540  // aren't then give up
2541  if (!(Idx == -1 || Idx == SHFIndices[i]))
2542  return SDValue();
2543  }
2544  }
2545 
2546  // Calculate the immediate. Replace any remaining undefs with zero
2547  APInt Imm(32, 0);
2548  for (int i = 3; i >= 0; --i) {
2549  int Idx = SHFIndices[i];
2550 
2551  if (Idx == -1)
2552  Idx = 0;
2553 
2554  Imm <<= 2;
2555  Imm |= Idx & 0x3;
2556  }
2557 
2558  SDLoc DL(Op);
2559  return DAG.getNode(MipsISD::SHF, DL, ResTy,
2560  DAG.getConstant(Imm, DL, MVT::i32), Op->getOperand(0));
2561 }
2562 
2563 /// Determine whether a range fits a regular pattern of values.
2564 /// This function accounts for the possibility of jumping over the End iterator.
2565 template <typename ValType>
2566 static bool
2568  unsigned CheckStride,
2570  ValType ExpectedIndex, unsigned ExpectedIndexStride) {
2571  auto &I = Begin;
2572 
2573  while (I != End) {
2574  if (*I != -1 && *I != ExpectedIndex)
2575  return false;
2576  ExpectedIndex += ExpectedIndexStride;
2577 
2578  // Incrementing past End is undefined behaviour so we must increment one
2579  // step at a time and check for End at each step.
2580  for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
2581  ; // Empty loop body.
2582  }
2583  return true;
2584 }
2585 
2586 // Determine whether VECTOR_SHUFFLE is a SPLATI.
2587 //
2588 // It is a SPLATI when the mask is:
2589 // <x, x, x, ...>
2590 // where x is any valid index.
2591 //
2592 // When undef's appear in the mask they are treated as if they were whatever
2593 // value is necessary in order to fit the above form.
2594 static bool isVECTOR_SHUFFLE_SPLATI(SDValue Op, EVT ResTy,
2595  SmallVector<int, 16> Indices,
2596  SelectionDAG &DAG) {
2597  assert((Indices.size() % 2) == 0);
2598 
2599  int SplatIndex = -1;
2600  for (const auto &V : Indices) {
2601  if (V != -1) {
2602  SplatIndex = V;
2603  break;
2604  }
2605  }
2606 
2607  return fitsRegularPattern<int>(Indices.begin(), 1, Indices.end(), SplatIndex,
2608  0);
2609 }
2610 
2611 // Lower VECTOR_SHUFFLE into ILVEV (if possible).
2612 //
2613 // ILVEV interleaves the even elements from each vector.
2614 //
2615 // It is possible to lower into ILVEV when the mask consists of two of the
2616 // following forms interleaved:
2617 // <0, 2, 4, ...>
2618 // <n, n+2, n+4, ...>
2619 // where n is the number of elements in the vector.
2620 // For example:
2621 // <0, 0, 2, 2, 4, 4, ...>
2622 // <0, n, 2, n+2, 4, n+4, ...>
2623 //
2624 // When undef's appear in the mask they are treated as if they were whatever
2625 // value is necessary in order to fit the above forms.
2627  SmallVector<int, 16> Indices,
2628  SelectionDAG &DAG) {
2629  assert((Indices.size() % 2) == 0);
2630 
2631  SDValue Wt;
2632  SDValue Ws;
2633  const auto &Begin = Indices.begin();
2634  const auto &End = Indices.end();
2635 
2636  // Check even elements are taken from the even elements of one half or the
2637  // other and pick an operand accordingly.
2638  if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
2639  Wt = Op->getOperand(0);
2640  else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 2))
2641  Wt = Op->getOperand(1);
2642  else
2643  return SDValue();
2644 
2645  // Check odd elements are taken from the even elements of one half or the
2646  // other and pick an operand accordingly.
2647  if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
2648  Ws = Op->getOperand(0);
2649  else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 2))
2650  Ws = Op->getOperand(1);
2651  else
2652  return SDValue();
2653 
2654  return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Ws, Wt);
2655 }
2656 
2657 // Lower VECTOR_SHUFFLE into ILVOD (if possible).
2658 //
2659 // ILVOD interleaves the odd elements from each vector.
2660 //
2661 // It is possible to lower into ILVOD when the mask consists of two of the
2662 // following forms interleaved:
2663 // <1, 3, 5, ...>
2664 // <n+1, n+3, n+5, ...>
2665 // where n is the number of elements in the vector.
2666 // For example:
2667 // <1, 1, 3, 3, 5, 5, ...>
2668 // <1, n+1, 3, n+3, 5, n+5, ...>
2669 //
2670 // When undef's appear in the mask they are treated as if they were whatever
2671 // value is necessary in order to fit the above forms.
2673  SmallVector<int, 16> Indices,
2674  SelectionDAG &DAG) {
2675  assert((Indices.size() % 2) == 0);
2676 
2677  SDValue Wt;
2678  SDValue Ws;
2679  const auto &Begin = Indices.begin();
2680  const auto &End = Indices.end();
2681 
2682  // Check even elements are taken from the odd elements of one half or the
2683  // other and pick an operand accordingly.
2684  if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
2685  Wt = Op->getOperand(0);
2686  else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + 1, 2))
2687  Wt = Op->getOperand(1);
2688  else
2689  return SDValue();
2690 
2691  // Check odd elements are taken from the odd elements of one half or the
2692  // other and pick an operand accordingly.
2693  if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
2694  Ws = Op->getOperand(0);
2695  else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + 1, 2))
2696  Ws = Op->getOperand(1);
2697  else
2698  return SDValue();
2699 
2700  return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Wt, Ws);
2701 }
2702 
2703 // Lower VECTOR_SHUFFLE into ILVR (if possible).
2704 //
2705 // ILVR interleaves consecutive elements from the right (lowest-indexed) half of
2706 // each vector.
2707 //
2708 // It is possible to lower into ILVR when the mask consists of two of the
2709 // following forms interleaved:
2710 // <0, 1, 2, ...>
2711 // <n, n+1, n+2, ...>
2712 // where n is the number of elements in the vector.
2713 // For example:
2714 // <0, 0, 1, 1, 2, 2, ...>
2715 // <0, n, 1, n+1, 2, n+2, ...>
2716 //
2717 // When undef's appear in the mask they are treated as if they were whatever
2718 // value is necessary in order to fit the above forms.
2720  SmallVector<int, 16> Indices,
2721  SelectionDAG &DAG) {
2722  assert((Indices.size() % 2) == 0);
2723 
2724  SDValue Wt;
2725  SDValue Ws;
2726  const auto &Begin = Indices.begin();
2727  const auto &End = Indices.end();
2728 
2729  // Check even elements are taken from the right (lowest-indexed) elements of
2730  // one half or the other and pick an operand accordingly.
2731  if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
2732  Wt = Op->getOperand(0);
2733  else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 1))
2734  Wt = Op->getOperand(1);
2735  else
2736  return SDValue();
2737 
2738  // Check odd elements are taken from the right (lowest-indexed) elements of
2739  // one half or the other and pick an operand accordingly.
2740  if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
2741  Ws = Op->getOperand(0);
2742  else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 1))
2743  Ws = Op->getOperand(1);
2744  else
2745  return SDValue();
2746 
2747  return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Ws, Wt);
2748 }
2749 
2750 // Lower VECTOR_SHUFFLE into ILVL (if possible).
2751 //
2752 // ILVL interleaves consecutive elements from the left (highest-indexed) half
2753 // of each vector.
2754 //
2755 // It is possible to lower into ILVL when the mask consists of two of the
2756 // following forms interleaved:
2757 // <x, x+1, x+2, ...>
2758 // <n+x, n+x+1, n+x+2, ...>
2759 // where n is the number of elements in the vector and x is half n.
2760 // For example:
2761 // <x, x, x+1, x+1, x+2, x+2, ...>
2762 // <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
2763 //
2764 // When undef's appear in the mask they are treated as if they were whatever
2765 // value is necessary in order to fit the above forms.
2767  SmallVector<int, 16> Indices,
2768  SelectionDAG &DAG) {
2769  assert((Indices.size() % 2) == 0);
2770 
2771  unsigned HalfSize = Indices.size() / 2;
2772  SDValue Wt;
2773  SDValue Ws;
2774  const auto &Begin = Indices.begin();
2775  const auto &End = Indices.end();
2776 
2777  // Check even elements are taken from the left (highest-indexed) elements of
2778  // one half or the other and pick an operand accordingly.
2779  if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
2780  Wt = Op->getOperand(0);
2781  else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + HalfSize, 1))
2782  Wt = Op->getOperand(1);
2783  else
2784  return SDValue();
2785 
2786  // Check odd elements are taken from the left (highest-indexed) elements of
2787  // one half or the other and pick an operand accordingly.
2788  if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
2789  Ws = Op->getOperand(0);
2790  else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + HalfSize,
2791  1))
2792  Ws = Op->getOperand(1);
2793  else
2794  return SDValue();
2795 
2796  return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Ws, Wt);
2797 }
2798 
2799 // Lower VECTOR_SHUFFLE into PCKEV (if possible).
2800 //
2801 // PCKEV copies the even elements of each vector into the result vector.
2802 //
2803 // It is possible to lower into PCKEV when the mask consists of two of the
2804 // following forms concatenated:
2805 // <0, 2, 4, ...>
2806 // <n, n+2, n+4, ...>
2807 // where n is the number of elements in the vector.
2808 // For example:
2809 // <0, 2, 4, ..., 0, 2, 4, ...>
2810 // <0, 2, 4, ..., n, n+2, n+4, ...>
2811 //
2812 // When undef's appear in the mask they are treated as if they were whatever
2813 // value is necessary in order to fit the above forms.
2815  SmallVector<int, 16> Indices,
2816  SelectionDAG &DAG) {
2817  assert((Indices.size() % 2) == 0);
2818 
2819  SDValue Wt;
2820  SDValue Ws;
2821  const auto &Begin = Indices.begin();
2822  const auto &Mid = Indices.begin() + Indices.size() / 2;
2823  const auto &End = Indices.end();
2824 
2825  if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
2826  Wt = Op->getOperand(0);
2827  else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size(), 2))
2828  Wt = Op->getOperand(1);
2829  else
2830  return SDValue();
2831 
2832  if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
2833  Ws = Op->getOperand(0);
2834  else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size(), 2))
2835  Ws = Op->getOperand(1);
2836  else
2837  return SDValue();
2838 
2839  return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Ws, Wt);
2840 }
2841 
2842 // Lower VECTOR_SHUFFLE into PCKOD (if possible).
2843 //
2844 // PCKOD copies the odd elements of each vector into the result vector.
2845 //
2846 // It is possible to lower into PCKOD when the mask consists of two of the
2847 // following forms concatenated:
2848 // <1, 3, 5, ...>
2849 // <n+1, n+3, n+5, ...>
2850 // where n is the number of elements in the vector.
2851 // For example:
2852 // <1, 3, 5, ..., 1, 3, 5, ...>
2853 // <1, 3, 5, ..., n+1, n+3, n+5, ...>
2854 //
2855 // When undef's appear in the mask they are treated as if they were whatever
2856 // value is necessary in order to fit the above forms.
2858  SmallVector<int, 16> Indices,
2859  SelectionDAG &DAG) {
2860  assert((Indices.size() % 2) == 0);
2861 
2862  SDValue Wt;
2863  SDValue Ws;
2864  const auto &Begin = Indices.begin();
2865  const auto &Mid = Indices.begin() + Indices.size() / 2;
2866  const auto &End = Indices.end();
2867 
2868  if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
2869  Wt = Op->getOperand(0);
2870  else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size() + 1, 2))
2871  Wt = Op->getOperand(1);
2872  else
2873  return SDValue();
2874 
2875  if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
2876  Ws = Op->getOperand(0);
2877  else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size() + 1, 2))
2878  Ws = Op->getOperand(1);
2879  else
2880  return SDValue();
2881 
2882  return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Ws, Wt);
2883 }
2884 
2885 // Lower VECTOR_SHUFFLE into VSHF.
2886 //
2887 // This mostly consists of converting the shuffle indices in Indices into a
2888 // BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is
2889 // also code to eliminate unused operands of the VECTOR_SHUFFLE. For example,
2890 // if the type is v8i16 and all the indices are less than 8 then the second
2891 // operand is unused and can be replaced with anything. We choose to replace it
2892 // with the used operand since this reduces the number of instructions overall.
2894  SmallVector<int, 16> Indices,
2895  SelectionDAG &DAG) {
2897  SDValue Op0;
2898  SDValue Op1;
2899  EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger();
2900  EVT MaskEltTy = MaskVecTy.getVectorElementType();
2901  bool Using1stVec = false;
2902  bool Using2ndVec = false;
2903  SDLoc DL(Op);
2904  int ResTyNumElts = ResTy.getVectorNumElements();
2905 
2906  for (int i = 0; i < ResTyNumElts; ++i) {
2907  // Idx == -1 means UNDEF
2908  int Idx = Indices[i];
2909 
2910  if (0 <= Idx && Idx < ResTyNumElts)
2911  Using1stVec = true;
2912  if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2)
2913  Using2ndVec = true;
2914  }
2915 
2916  for (SmallVector<int, 16>::iterator I = Indices.begin(); I != Indices.end();
2917  ++I)
2918  Ops.push_back(DAG.getTargetConstant(*I, DL, MaskEltTy));
2919 
2920  SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
2921 
2922  if (Using1stVec && Using2ndVec) {
2923  Op0 = Op->getOperand(0);
2924  Op1 = Op->getOperand(1);
2925  } else if (Using1stVec)
2926  Op0 = Op1 = Op->getOperand(0);
2927  else if (Using2ndVec)
2928  Op0 = Op1 = Op->getOperand(1);
2929  else
2930  llvm_unreachable("shuffle vector mask references neither vector operand?");
2931 
2932  // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2933  // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2934  // VSHF concatenates the vectors in a bitwise fashion:
2935  // <0b00, 0b01> + <0b10, 0b11> ->
2936  // 0b0100 + 0b1110 -> 0b01001110
2937  // <0b10, 0b11, 0b00, 0b01>
2938  // We must therefore swap the operands to get the correct result.
2939  return DAG.getNode(MipsISD::VSHF, DL, ResTy, MaskVec, Op1, Op0);
2940 }
2941 
2942 // Lower VECTOR_SHUFFLE into one of a number of instructions depending on the
2943 // indices in the shuffle.
2944 SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2945  SelectionDAG &DAG) const {
2946  ShuffleVectorSDNode *Node = cast<ShuffleVectorSDNode>(Op);
2947  EVT ResTy = Op->getValueType(0);
2948 
2949  if (!ResTy.is128BitVector())
2950  return SDValue();
2951 
2952  int ResTyNumElts = ResTy.getVectorNumElements();
2953  SmallVector<int, 16> Indices;
2954 
2955  for (int i = 0; i < ResTyNumElts; ++i)
2956  Indices.push_back(Node->getMaskElt(i));
2957 
2958  // splati.[bhwd] is preferable to the others but is matched from
2959  // MipsISD::VSHF.
2960  if (isVECTOR_SHUFFLE_SPLATI(Op, ResTy, Indices, DAG))
2961  return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG);
2962  SDValue Result;
2963  if ((Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG)))
2964  return Result;
2965  if ((Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG)))
2966  return Result;
2967  if ((Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG)))
2968  return Result;
2969  if ((Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG)))
2970  return Result;
2971  if ((Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG)))
2972  return Result;
2973  if ((Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG)))
2974  return Result;
2975  if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG)))
2976  return Result;
2977  return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG);
2978 }
2979 
2981 MipsSETargetLowering::emitBPOSGE32(MachineInstr &MI,
2982  MachineBasicBlock *BB) const {
2983  // $bb:
2984  // bposge32_pseudo $vr0
2985  // =>
2986  // $bb:
2987  // bposge32 $tbb
2988  // $fbb:
2989  // li $vr2, 0
2990  // b $sink
2991  // $tbb:
2992  // li $vr1, 1
2993  // $sink:
2994  // $vr0 = phi($vr2, $fbb, $vr1, $tbb)
2995 
2996  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
2998  const TargetRegisterClass *RC = &Mips::GPR32RegClass;
2999  DebugLoc DL = MI.getDebugLoc();
3000  const BasicBlock *LLVM_BB = BB->getBasicBlock();
3002  MachineFunction *F = BB->getParent();
3003  MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
3004  MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
3006  F->insert(It, FBB);
3007  F->insert(It, TBB);
3008  F->insert(It, Sink);
3009 
3010  // Transfer the remainder of BB and its successor edges to Sink.
3011  Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)),
3012  BB->end());
3014 
3015  // Add successors.
3016  BB->addSuccessor(FBB);
3017  BB->addSuccessor(TBB);
3018  FBB->addSuccessor(Sink);
3019  TBB->addSuccessor(Sink);
3020 
3021  // Insert the real bposge32 instruction to $BB.
3022  BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB);
3023  // Insert the real bposge32c instruction to $BB.
3024  BuildMI(BB, DL, TII->get(Mips::BPOSGE32C_MMR3)).addMBB(TBB);
3025 
3026  // Fill $FBB.
3027  unsigned VR2 = RegInfo.createVirtualRegister(RC);
3028  BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2)
3029  .addReg(Mips::ZERO).addImm(0);
3030  BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
3031 
3032  // Fill $TBB.
3033  unsigned VR1 = RegInfo.createVirtualRegister(RC);
3034  BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1)
3035  .addReg(Mips::ZERO).addImm(1);
3036 
3037  // Insert phi function to $Sink.
3038  BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
3039  MI.getOperand(0).getReg())
3040  .addReg(VR2)
3041  .addMBB(FBB)
3042  .addReg(VR1)
3043  .addMBB(TBB);
3044 
3045  MI.eraseFromParent(); // The pseudo instruction is gone now.
3046  return Sink;
3047 }
3048 
3049 MachineBasicBlock *MipsSETargetLowering::emitMSACBranchPseudo(
3050  MachineInstr &MI, MachineBasicBlock *BB, unsigned BranchOp) const {
3051  // $bb:
3052  // vany_nonzero $rd, $ws
3053  // =>
3054  // $bb:
3055  // bnz.b $ws, $tbb
3056  // b $fbb
3057  // $fbb:
3058  // li $rd1, 0
3059  // b $sink
3060  // $tbb:
3061  // li $rd2, 1
3062  // $sink:
3063  // $rd = phi($rd1, $fbb, $rd2, $tbb)
3064 
3065  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3067  const TargetRegisterClass *RC = &Mips::GPR32RegClass;
3068  DebugLoc DL = MI.getDebugLoc();
3069  const BasicBlock *LLVM_BB = BB->getBasicBlock();
3071  MachineFunction *F = BB->getParent();
3072  MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
3073  MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
3075  F->insert(It, FBB);
3076  F->insert(It, TBB);
3077  F->insert(It, Sink);
3078 
3079  // Transfer the remainder of BB and its successor edges to Sink.
3080  Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)),
3081  BB->end());
3083 
3084  // Add successors.
3085  BB->addSuccessor(FBB);
3086  BB->addSuccessor(TBB);
3087  FBB->addSuccessor(Sink);
3088  TBB->addSuccessor(Sink);
3089 
3090  // Insert the real bnz.b instruction to $BB.
3091  BuildMI(BB, DL, TII->get(BranchOp))
3092  .addReg(MI.getOperand(1).getReg())
3093  .addMBB(TBB);
3094 
3095  // Fill $FBB.
3096  unsigned RD1 = RegInfo.createVirtualRegister(RC);
3097  BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), RD1)
3098  .addReg(Mips::ZERO).addImm(0);
3099  BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
3100 
3101  // Fill $TBB.
3102  unsigned RD2 = RegInfo.createVirtualRegister(RC);
3103  BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), RD2)
3104  .addReg(Mips::ZERO).addImm(1);
3105 
3106  // Insert phi function to $Sink.
3107  BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
3108  MI.getOperand(0).getReg())
3109  .addReg(RD1)
3110  .addMBB(FBB)
3111  .addReg(RD2)
3112  .addMBB(TBB);
3113 
3114  MI.eraseFromParent(); // The pseudo instruction is gone now.
3115  return Sink;
3116 }
3117 
3118 // Emit the COPY_FW pseudo instruction.
3119 //
3120 // copy_fw_pseudo $fd, $ws, n
3121 // =>
3122 // copy_u_w $rt, $ws, $n
3123 // mtc1 $rt, $fd
3124 //
3125 // When n is zero, the equivalent operation can be performed with (potentially)
3126 // zero instructions due to register overlaps. This optimization is never valid
3127 // for lane 1 because it would require FR=0 mode which isn't supported by MSA.
3129 MipsSETargetLowering::emitCOPY_FW(MachineInstr &MI,
3130  MachineBasicBlock *BB) const {
3132  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3133  DebugLoc DL = MI.getDebugLoc();
3134  unsigned Fd = MI.getOperand(0).getReg();
3135  unsigned Ws = MI.getOperand(1).getReg();
3136  unsigned Lane = MI.getOperand(2).getImm();
3137 
3138  if (Lane == 0) {
3139  unsigned Wt = Ws;
3140  if (!Subtarget.useOddSPReg()) {
3141  // We must copy to an even-numbered MSA register so that the
3142  // single-precision sub-register is also guaranteed to be even-numbered.
3143  Wt = RegInfo.createVirtualRegister(&Mips::MSA128WEvensRegClass);
3144 
3145  BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Wt).addReg(Ws);
3146  }
3147 
3148  BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo);
3149  } else {
3150  unsigned Wt = RegInfo.createVirtualRegister(
3151  Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass :
3152  &Mips::MSA128WEvensRegClass);
3153 
3154  BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane);
3155  BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo);
3156  }
3157 
3158  MI.eraseFromParent(); // The pseudo instruction is gone now.
3159  return BB;
3160 }
3161 
3162 // Emit the COPY_FD pseudo instruction.
3163 //
3164 // copy_fd_pseudo $fd, $ws, n
3165 // =>
3166 // splati.d $wt, $ws, $n
3167 // copy $fd, $wt:sub_64
3168 //
3169 // When n is zero, the equivalent operation can be performed with (potentially)
3170 // zero instructions due to register overlaps. This optimization is always
3171 // valid because FR=1 mode which is the only supported mode in MSA.
3173 MipsSETargetLowering::emitCOPY_FD(MachineInstr &MI,
3174  MachineBasicBlock *BB) const {
3176 
3178  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3179  unsigned Fd = MI.getOperand(0).getReg();
3180  unsigned Ws = MI.getOperand(1).getReg();
3181  unsigned Lane = MI.getOperand(2).getImm() * 2;
3182  DebugLoc DL = MI.getDebugLoc();
3183 
3184  if (Lane == 0)
3185  BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_64);
3186  else {
3187  unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3188 
3189  BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wt).addReg(Ws).addImm(1);
3190  BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_64);
3191  }
3192 
3193  MI.eraseFromParent(); // The pseudo instruction is gone now.
3194  return BB;
3195 }
3196 
3197 // Emit the INSERT_FW pseudo instruction.
3198 //
3199 // insert_fw_pseudo $wd, $wd_in, $n, $fs
3200 // =>
3201 // subreg_to_reg $wt:sub_lo, $fs
3202 // insve_w $wd[$n], $wd_in, $wt[0]
3204 MipsSETargetLowering::emitINSERT_FW(MachineInstr &MI,
3205  MachineBasicBlock *BB) const {
3207  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3208  DebugLoc DL = MI.getDebugLoc();
3209  unsigned Wd = MI.getOperand(0).getReg();
3210  unsigned Wd_in = MI.getOperand(1).getReg();
3211  unsigned Lane = MI.getOperand(2).getImm();
3212  unsigned Fs = MI.getOperand(3).getReg();
3213  unsigned Wt = RegInfo.createVirtualRegister(
3214  Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass :
3215  &Mips::MSA128WEvensRegClass);
3216 
3217  BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
3218  .addImm(0)
3219  .addReg(Fs)
3220  .addImm(Mips::sub_lo);
3221  BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_W), Wd)
3222  .addReg(Wd_in)
3223  .addImm(Lane)
3224  .addReg(Wt)
3225  .addImm(0);
3226 
3227  MI.eraseFromParent(); // The pseudo instruction is gone now.
3228  return BB;
3229 }
3230 
3231 // Emit the INSERT_FD pseudo instruction.
3232 //
3233 // insert_fd_pseudo $wd, $fs, n
3234 // =>
3235 // subreg_to_reg $wt:sub_64, $fs
3236 // insve_d $wd[$n], $wd_in, $wt[0]
3238 MipsSETargetLowering::emitINSERT_FD(MachineInstr &MI,
3239  MachineBasicBlock *BB) const {
3241 
3243  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3244  DebugLoc DL = MI.getDebugLoc();
3245  unsigned Wd = MI.getOperand(0).getReg();
3246  unsigned Wd_in = MI.getOperand(1).getReg();
3247  unsigned Lane = MI.getOperand(2).getImm();
3248  unsigned Fs = MI.getOperand(3).getReg();
3249  unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3250 
3251  BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
3252  .addImm(0)
3253  .addReg(Fs)
3254  .addImm(Mips::sub_64);
3255  BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_D), Wd)
3256  .addReg(Wd_in)
3257  .addImm(Lane)
3258  .addReg(Wt)
3259  .addImm(0);
3260 
3261  MI.eraseFromParent(); // The pseudo instruction is gone now.
3262  return BB;
3263 }
3264 
3265 // Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction.
3266 //
3267 // For integer:
3268 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs)
3269 // =>
3270 // (SLL $lanetmp1, $lane, <log2size)
3271 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
3272 // (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs)
3273 // (NEG $lanetmp2, $lanetmp1)
3274 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2)
3275 //
3276 // For floating point:
3277 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs)
3278 // =>
3279 // (SUBREG_TO_REG $wt, $fs, <subreg>)
3280 // (SLL $lanetmp1, $lane, <log2size)
3281 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
3282 // (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0)
3283 // (NEG $lanetmp2, $lanetmp1)
3284 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2)
3285 MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX(
3286  MachineInstr &MI, MachineBasicBlock *BB, unsigned EltSizeInBytes,
3287  bool IsFP) const {
3289  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3290  DebugLoc DL = MI.getDebugLoc();
3291  unsigned Wd = MI.getOperand(0).getReg();
3292  unsigned SrcVecReg = MI.getOperand(1).getReg();
3293  unsigned LaneReg = MI.getOperand(2).getReg();
3294  unsigned SrcValReg = MI.getOperand(3).getReg();
3295 
3296  const TargetRegisterClass *VecRC = nullptr;
3297  // FIXME: This should be true for N32 too.
3298  const TargetRegisterClass *GPRRC =
3299  Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
3300  unsigned SubRegIdx = Subtarget.isABI_N64() ? Mips::sub_32 : 0;
3301  unsigned ShiftOp = Subtarget.isABI_N64() ? Mips::DSLL : Mips::SLL;
3302  unsigned EltLog2Size;
3303  unsigned InsertOp = 0;
3304  unsigned InsveOp = 0;
3305  switch (EltSizeInBytes) {
3306  default:
3307  llvm_unreachable("Unexpected size");
3308  case 1:
3309  EltLog2Size = 0;
3310  InsertOp = Mips::INSERT_B;
3311  InsveOp = Mips::INSVE_B;
3312  VecRC = &Mips::MSA128BRegClass;
3313  break;
3314  case 2:
3315  EltLog2Size = 1;
3316  InsertOp = Mips::INSERT_H;
3317  InsveOp = Mips::INSVE_H;
3318  VecRC = &Mips::MSA128HRegClass;
3319  break;
3320  case 4:
3321  EltLog2Size = 2;
3322  InsertOp = Mips::INSERT_W;
3323  InsveOp = Mips::INSVE_W;
3324  VecRC = &Mips::MSA128WRegClass;
3325  break;
3326  case 8:
3327  EltLog2Size = 3;
3328  InsertOp = Mips::INSERT_D;
3329  InsveOp = Mips::INSVE_D;
3330  VecRC = &Mips::MSA128DRegClass;
3331  break;
3332  }
3333 
3334  if (IsFP) {
3335  unsigned Wt = RegInfo.createVirtualRegister(VecRC);
3336  BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
3337  .addImm(0)
3338  .addReg(SrcValReg)
3339  .addImm(EltSizeInBytes == 8 ? Mips::sub_64 : Mips::sub_lo);
3340  SrcValReg = Wt;
3341  }
3342 
3343  // Convert the lane index into a byte index
3344  if (EltSizeInBytes != 1) {
3345  unsigned LaneTmp1 = RegInfo.createVirtualRegister(GPRRC);
3346  BuildMI(*BB, MI, DL, TII->get(ShiftOp), LaneTmp1)
3347  .addReg(LaneReg)
3348  .addImm(EltLog2Size);
3349  LaneReg = LaneTmp1;
3350  }
3351 
3352  // Rotate bytes around so that the desired lane is element zero
3353  unsigned WdTmp1 = RegInfo.createVirtualRegister(VecRC);
3354  BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), WdTmp1)
3355  .addReg(SrcVecReg)
3356  .addReg(SrcVecReg)
3357  .addReg(LaneReg, 0, SubRegIdx);
3358 
3359  unsigned WdTmp2 = RegInfo.createVirtualRegister(VecRC);
3360  if (IsFP) {
3361  // Use insve.df to insert to element zero
3362  BuildMI(*BB, MI, DL, TII->get(InsveOp), WdTmp2)
3363  .addReg(WdTmp1)
3364  .addImm(0)
3365  .addReg(SrcValReg)
3366  .addImm(0);
3367  } else {
3368  // Use insert.df to insert to element zero
3369  BuildMI(*BB, MI, DL, TII->get(InsertOp), WdTmp2)
3370  .addReg(WdTmp1)
3371  .addReg(SrcValReg)
3372  .addImm(0);
3373  }
3374 
3375  // Rotate elements the rest of the way for a full rotation.
3376  // sld.df inteprets $rt modulo the number of columns so we only need to negate
3377  // the lane index to do this.
3378  unsigned LaneTmp2 = RegInfo.createVirtualRegister(GPRRC);
3379  BuildMI(*BB, MI, DL, TII->get(Subtarget.isABI_N64() ? Mips::DSUB : Mips::SUB),
3380  LaneTmp2)
3381  .addReg(Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO)
3382  .addReg(LaneReg);
3383  BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), Wd)
3384  .addReg(WdTmp2)
3385  .addReg(WdTmp2)
3386  .addReg(LaneTmp2, 0, SubRegIdx);
3387 
3388  MI.eraseFromParent(); // The pseudo instruction is gone now.
3389  return BB;
3390 }
3391 
3392 // Emit the FILL_FW pseudo instruction.
3393 //
3394 // fill_fw_pseudo $wd, $fs
3395 // =>
3396 // implicit_def $wt1
3397 // insert_subreg $wt2:subreg_lo, $wt1, $fs
3398 // splati.w $wd, $wt2[0]
3400 MipsSETargetLowering::emitFILL_FW(MachineInstr &MI,
3401  MachineBasicBlock *BB) const {
3403  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3404  DebugLoc DL = MI.getDebugLoc();
3405  unsigned Wd = MI.getOperand(0).getReg();
3406  unsigned Fs = MI.getOperand(1).getReg();
3407  unsigned Wt1 = RegInfo.createVirtualRegister(
3408  Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3409  : &Mips::MSA128WEvensRegClass);
3410  unsigned Wt2 = RegInfo.createVirtualRegister(
3411  Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3412  : &Mips::MSA128WEvensRegClass);
3413 
3414  BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1);
3415  BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2)
3416  .addReg(Wt1)
3417  .addReg(Fs)
3418  .addImm(Mips::sub_lo);
3419  BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wd).addReg(Wt2).addImm(0);
3420 
3421  MI.eraseFromParent(); // The pseudo instruction is gone now.
3422  return BB;
3423 }
3424 
3425 // Emit the FILL_FD pseudo instruction.
3426 //
3427 // fill_fd_pseudo $wd, $fs
3428 // =>
3429 // implicit_def $wt1
3430 // insert_subreg $wt2:subreg_64, $wt1, $fs
3431 // splati.d $wd, $wt2[0]
3433 MipsSETargetLowering::emitFILL_FD(MachineInstr &MI,
3434  MachineBasicBlock *BB) const {
3436 
3438  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3439  DebugLoc DL = MI.getDebugLoc();
3440  unsigned Wd = MI.getOperand(0).getReg();
3441  unsigned Fs = MI.getOperand(1).getReg();
3442  unsigned Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3443  unsigned Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3444 
3445  BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1);
3446  BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2)
3447  .addReg(Wt1)
3448  .addReg(Fs)
3449  .addImm(Mips::sub_64);
3450  BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wd).addReg(Wt2).addImm(0);
3451 
3452  MI.eraseFromParent(); // The pseudo instruction is gone now.
3453  return BB;
3454 }
3455 
3456 // Emit the ST_F16_PSEDUO instruction to store a f16 value from an MSA
3457 // register.
3458 //
3459 // STF16 MSA128F16:$wd, mem_simm10:$addr
3460 // =>
3461 // copy_u.h $rtemp,$wd[0]
3462 // sh $rtemp, $addr
3463 //
3464 // Safety: We can't use st.h & co as they would over write the memory after
3465 // the destination. It would require half floats be allocated 16 bytes(!) of
3466 // space.
3468 MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI,
3469  MachineBasicBlock *BB) const {
3470 
3472  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3473  DebugLoc DL = MI.getDebugLoc();
3474  unsigned Ws = MI.getOperand(0).getReg();
3475  unsigned Rt = MI.getOperand(1).getReg();
3476  const MachineMemOperand &MMO = **MI.memoperands_begin();
3477  unsigned Imm = MMO.getOffset();
3478 
3479  // Caution: A load via the GOT can expand to a GPR32 operand, a load via
3480  // spill and reload can expand as a GPR64 operand. Examine the
3481  // operand in detail and default to ABI.
3482  const TargetRegisterClass *RC =
3483  MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg())
3484  : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass
3485  : &Mips::GPR64RegClass);
3486  const bool UsingMips32 = RC == &Mips::GPR32RegClass;
3487  unsigned Rs = RegInfo.createVirtualRegister(&Mips::GPR32RegClass);
3488 
3489  BuildMI(*BB, MI, DL, TII->get(Mips::COPY_U_H), Rs).addReg(Ws).addImm(0);
3490  if(!UsingMips32) {
3491  unsigned Tmp = RegInfo.createVirtualRegister(&Mips::GPR64RegClass);
3492  BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Tmp)
3493  .addImm(0)
3494  .addReg(Rs)
3495  .addImm(Mips::sub_32);
3496  Rs = Tmp;
3497  }
3498  BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::SH : Mips::SH64))
3499  .addReg(Rs)
3500  .addReg(Rt)
3501  .addImm(Imm)
3503  &MMO, MMO.getOffset(), MMO.getSize()));
3504 
3505  MI.eraseFromParent();
3506  return BB;
3507 }
3508 
3509 // Emit the LD_F16_PSEDUO instruction to load a f16 value into an MSA register.
3510 //
3511 // LD_F16 MSA128F16:$wd, mem_simm10:$addr
3512 // =>
3513 // lh $rtemp, $addr
3514 // fill.h $wd, $rtemp
3515 //
3516 // Safety: We can't use ld.h & co as they over-read from the source.
3517 // Additionally, if the address is not modulo 16, 2 cases can occur:
3518 // a) Segmentation fault as the load instruction reads from a memory page
3519 // memory it's not supposed to.
3520 // b) The load crosses an implementation specific boundary, requiring OS
3521 // intervention.
3523 MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI,
3524  MachineBasicBlock *BB) const {
3525 
3527  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3528  DebugLoc DL = MI.getDebugLoc();
3529  unsigned Wd = MI.getOperand(0).getReg();
3530 
3531  // Caution: A load via the GOT can expand to a GPR32 operand, a load via
3532  // spill and reload can expand as a GPR64 operand. Examine the
3533  // operand in detail and default to ABI.
3534  const TargetRegisterClass *RC =
3535  MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg())
3536  : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass
3537  : &Mips::GPR64RegClass);
3538 
3539  const bool UsingMips32 = RC == &Mips::GPR32RegClass;
3540  unsigned Rt = RegInfo.createVirtualRegister(RC);
3541 
3542  MachineInstrBuilder MIB =
3543  BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::LH : Mips::LH64), Rt);
3544  for (unsigned i = 1; i < MI.getNumOperands(); i++)
3545  MIB.add(MI.getOperand(i));
3546 
3547  if(!UsingMips32) {
3548  unsigned Tmp = RegInfo.createVirtualRegister(&Mips::GPR32RegClass);
3549  BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Tmp).addReg(Rt, 0, Mips::sub_32);
3550  Rt = Tmp;
3551  }
3552 
3553  BuildMI(*BB, MI, DL, TII->get(Mips::FILL_H), Wd).addReg(Rt);
3554 
3555  MI.eraseFromParent();
3556  return BB;
3557 }
3558 
3559 // Emit the FPROUND_PSEUDO instruction.
3560 //
3561 // Round an FGR64Opnd, FGR32Opnd to an f16.
3562 //
3563 // Safety: Cycle the operand through the GPRs so the result always ends up
3564 // the correct MSA register.
3565 //
3566 // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fs
3567 // / FGR64Opnd:$Fs and MSA128F16:$Wd to the same physical register
3568 // (which they can be, as the MSA registers are defined to alias the
3569 // FPU's 64 bit and 32 bit registers) the result can be accessed using
3570 // the correct register class. That requires operands be tie-able across
3571 // register classes which have a sub/super register class relationship.
3572 //
3573 // For FPG32Opnd:
3574 //
3575 // FPROUND MSA128F16:$wd, FGR32Opnd:$fs
3576 // =>
3577 // mfc1 $rtemp, $fs
3578 // fill.w $rtemp, $wtemp
3579 // fexdo.w $wd, $wtemp, $wtemp
3580 //
3581 // For FPG64Opnd on mips32r2+:
3582 //
3583 // FPROUND MSA128F16:$wd, FGR64Opnd:$fs
3584 // =>
3585 // mfc1 $rtemp, $fs
3586 // fill.w $rtemp, $wtemp
3587 // mfhc1 $rtemp2, $fs
3588 // insert.w $wtemp[1], $rtemp2
3589 // insert.w $wtemp[3], $rtemp2
3590 // fexdo.w $wtemp2, $wtemp, $wtemp
3591 // fexdo.h $wd, $temp2, $temp2
3592 //
3593 // For FGR64Opnd on mips64r2+:
3594 //
3595 // FPROUND MSA128F16:$wd, FGR64Opnd:$fs
3596 // =>
3597 // dmfc1 $rtemp, $fs
3598 // fill.d $rtemp, $wtemp
3599 // fexdo.w $wtemp2, $wtemp, $wtemp
3600 // fexdo.h $wd, $wtemp2, $wtemp2
3601 //
3602 // Safety note: As $wtemp is UNDEF, we may provoke a spurious exception if the
3603 // undef bits are "just right" and the exception enable bits are
3604 // set. By using fill.w to replicate $fs into all elements over
3605 // insert.w for one element, we avoid that potiential case. If
3606 // fexdo.[hw] causes an exception in, the exception is valid and it
3607 // occurs for all elements.
3609 MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI,
3610  MachineBasicBlock *BB,
3611  bool IsFGR64) const {
3612 
3613  // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous
3614  // here. It's technically doable to support MIPS32 here, but the ISA forbids
3615  // it.
3617 
3618  bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64;
3619  bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64;
3620 
3622  DebugLoc DL = MI.getDebugLoc();
3623  unsigned Wd = MI.getOperand(0).getReg();
3624  unsigned Fs = MI.getOperand(1).getReg();
3625 
3626  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3627  unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3628  const TargetRegisterClass *GPRRC =
3629  IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
3630  unsigned MFC1Opc = IsFGR64onMips64
3631  ? Mips::DMFC1
3632  : (IsFGR64onMips32 ? Mips::MFC1_D64 : Mips::MFC1);
3633  unsigned FILLOpc = IsFGR64onMips64 ? Mips::FILL_D : Mips::FILL_W;
3634 
3635  // Perform the register class copy as mentioned above.
3636  unsigned Rtemp = RegInfo.createVirtualRegister(GPRRC);
3637  BuildMI(*BB, MI, DL, TII->get(MFC1Opc), Rtemp).addReg(Fs);
3638  BuildMI(*BB, MI, DL, TII->get(FILLOpc), Wtemp).addReg(Rtemp);
3639  unsigned WPHI = Wtemp;
3640 
3641  if (IsFGR64onMips32) {
3642  unsigned Rtemp2 = RegInfo.createVirtualRegister(GPRRC);
3643  BuildMI(*BB, MI, DL, TII->get(Mips::MFHC1_D64), Rtemp2).addReg(Fs);
3644  unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3645  unsigned Wtemp3 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3646  BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp2)
3647  .addReg(Wtemp)
3648  .addReg(Rtemp2)
3649  .addImm(1);
3650  BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp3)
3651  .addReg(Wtemp2)
3652  .addReg(Rtemp2)
3653  .addImm(3);
3654  WPHI = Wtemp3;
3655  }
3656 
3657  if (IsFGR64) {
3658  unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3659  BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_W), Wtemp2)
3660  .addReg(WPHI)
3661  .addReg(WPHI);
3662  WPHI = Wtemp2;
3663  }
3664 
3665  BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_H), Wd).addReg(WPHI).addReg(WPHI);
3666 
3667  MI.eraseFromParent();
3668  return BB;
3669 }
3670 
3671 // Emit the FPEXTEND_PSEUDO instruction.
3672 //
3673 // Expand an f16 to either a FGR32Opnd or FGR64Opnd.
3674 //
3675 // Safety: Cycle the result through the GPRs so the result always ends up
3676 // the correct floating point register.
3677 //
3678 // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fd
3679 // / FGR64Opnd:$Fd and MSA128F16:$Ws to the same physical register
3680 // (which they can be, as the MSA registers are defined to alias the
3681 // FPU's 64 bit and 32 bit registers) the result can be accessed using
3682 // the correct register class. That requires operands be tie-able across
3683 // register classes which have a sub/super register class relationship. I
3684 // haven't checked.
3685 //
3686 // For FGR32Opnd:
3687 //
3688 // FPEXTEND FGR32Opnd:$fd, MSA128F16:$ws
3689 // =>
3690 // fexupr.w $wtemp, $ws
3691 // copy_s.w $rtemp, $ws[0]
3692 // mtc1 $rtemp, $fd
3693 //
3694 // For FGR64Opnd on Mips64:
3695 //
3696 // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws
3697 // =>
3698 // fexupr.w $wtemp, $ws
3699 // fexupr.d $wtemp2, $wtemp
3700 // copy_s.d $rtemp, $wtemp2s[0]
3701 // dmtc1 $rtemp, $fd
3702 //
3703 // For FGR64Opnd on Mips32:
3704 //
3705 // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws
3706 // =>
3707 // fexupr.w $wtemp, $ws
3708 // fexupr.d $wtemp2, $wtemp
3709 // copy_s.w $rtemp, $wtemp2[0]
3710 // mtc1 $rtemp, $ftemp
3711 // copy_s.w $rtemp2, $wtemp2[1]
3712 // $fd = mthc1 $rtemp2, $ftemp
3714 MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI,
3715  MachineBasicBlock *BB,
3716  bool IsFGR64) const {
3717 
3718  // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous
3719  // here. It's technically doable to support MIPS32 here, but the ISA forbids
3720  // it.
3722 
3723  bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64;
3724  bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64;
3725 
3727  DebugLoc DL = MI.getDebugLoc();
3728  unsigned Fd = MI.getOperand(0).getReg();
3729  unsigned Ws = MI.getOperand(1).getReg();
3730 
3731  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3732  const TargetRegisterClass *GPRRC =
3733  IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
3734  unsigned MTC1Opc = IsFGR64onMips64
3735  ? Mips::DMTC1
3736  : (IsFGR64onMips32 ? Mips::MTC1_D64 : Mips::MTC1);
3737  unsigned COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W;
3738 
3739  unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3740  unsigned WPHI = Wtemp;
3741 
3742  BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_W), Wtemp).addReg(Ws);
3743  if (IsFGR64) {
3744  WPHI = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3745  BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_D), WPHI).addReg(Wtemp);
3746  }
3747 
3748  // Perform the safety regclass copy mentioned above.
3749  unsigned Rtemp = RegInfo.createVirtualRegister(GPRRC);
3750  unsigned FPRPHI = IsFGR64onMips32
3751  ? RegInfo.createVirtualRegister(&Mips::FGR64RegClass)
3752  : Fd;
3753  BuildMI(*BB, MI, DL, TII->get(COPYOpc), Rtemp).addReg(WPHI).addImm(0);
3754  BuildMI(*BB, MI, DL, TII->get(MTC1Opc), FPRPHI).addReg(Rtemp);
3755 
3756  if (IsFGR64onMips32) {
3757  unsigned Rtemp2 = RegInfo.createVirtualRegister(GPRRC);
3758  BuildMI(*BB, MI, DL, TII->get(Mips::COPY_S_W), Rtemp2)
3759  .addReg(WPHI)
3760  .addImm(1);
3761  BuildMI(*BB, MI, DL, TII->get(Mips::MTHC1_D64), Fd)
3762  .addReg(FPRPHI)
3763  .addReg(Rtemp2);
3764  }
3765 
3766  MI.eraseFromParent();
3767  return BB;
3768 }
3769 
3770 // Emit the FEXP2_W_1 pseudo instructions.
3771 //
3772 // fexp2_w_1_pseudo $wd, $wt
3773 // =>
3774 // ldi.w $ws, 1
3775 // fexp2.w $wd, $ws, $wt
3777 MipsSETargetLowering::emitFEXP2_W_1(MachineInstr &MI,
3778  MachineBasicBlock *BB) const {
3780  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3781  const TargetRegisterClass *RC = &Mips::MSA128WRegClass;
3782  unsigned Ws1 = RegInfo.createVirtualRegister(RC);
3783  unsigned Ws2 = RegInfo.createVirtualRegister(RC);
3784  DebugLoc DL = MI.getDebugLoc();
3785 
3786  // Splat 1.0 into a vector
3787  BuildMI(*BB, MI, DL, TII->get(Mips::LDI_W), Ws1).addImm(1);
3788  BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_W), Ws2).addReg(Ws1);
3789 
3790  // Emit 1.0 * fexp2(Wt)
3791  BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_W), MI.getOperand(0).getReg())
3792  .addReg(Ws2)
3793  .addReg(MI.getOperand(1).getReg());
3794 
3795  MI.eraseFromParent(); // The pseudo instruction is gone now.
3796  return BB;
3797 }
3798 
3799 // Emit the FEXP2_D_1 pseudo instructions.
3800 //
3801 // fexp2_d_1_pseudo $wd, $wt
3802 // =>
3803 // ldi.d $ws, 1
3804 // fexp2.d $wd, $ws, $wt
3806 MipsSETargetLowering::emitFEXP2_D_1(MachineInstr &MI,
3807  MachineBasicBlock *BB) const {
3809  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3810  const TargetRegisterClass *RC = &Mips::MSA128DRegClass;
3811  unsigned Ws1 = RegInfo.createVirtualRegister(RC);
3812  unsigned Ws2 = RegInfo.createVirtualRegister(RC);
3813  DebugLoc DL = MI.getDebugLoc();
3814 
3815  // Splat 1.0 into a vector
3816  BuildMI(*BB, MI, DL, TII->get(Mips::LDI_D), Ws1).addImm(1);
3817  BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_D), Ws2).addReg(Ws1);
3818 
3819  // Emit 1.0 * fexp2(Wt)
3820  BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_D), MI.getOperand(0).getReg())
3821  .addReg(Ws2)
3822  .addReg(MI.getOperand(1).getReg());
3823 
3824  MI.eraseFromParent(); // The pseudo instruction is gone now.
3825  return BB;
3826 }
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
const MipsTargetLowering * createMipsSETargetLowering(const MipsTargetMachine &TM, const MipsSubtarget &STI)
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:571
static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT, EVT ShiftTy, SelectionDAG &DAG)
const MachineInstrBuilder & add(const MachineOperand &MO) const
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:877
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:594
EVT getValueType() const
Return the ValueType of the referenced return value.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant, which is required to be operand #1) half of the integer or float value specified as operand #0.
Definition: ISDOpcodes.h:184
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1563
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond)
Helper function to make it easier to build SetCC&#39;s if you just have an ISD::CondCode instead of an SD...
Definition: SelectionDAG.h:937
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:328
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
const MipsSubtarget & Subtarget
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector...
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:650
This class represents lattice values for constants.
Definition: AllocatorList.h:24
value_iterator value_end() const
static SDValue initAccumulator(SDValue In, const SDLoc &DL, SelectionDAG &DAG)
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS=0, unsigned Align=1, bool *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG)
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0...
Definition: ISDOpcodes.h:605
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:367
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:260
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the &#39;representative&#39; register class for the specified value type.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the &#39;usesCustomInserter&#39; fla...
const SDValue & getBasePtr() const
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:223
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:383
static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
unsigned getReg() const
getReg - Returns the register number.
const SDValue & getValue() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
SDVTList getVTList() const
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:648
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
const MipsInstrInfo * getInstrInfo() const override
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
const SDValue & getChain() const
uint64_t getSize() const
Return the size in bytes of the memory reference.
bool isABI_O32() const
unsigned getAlignment() const
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:811
A debug info location.
Definition: DebugLoc.h:34
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:141
F(f)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the &#39;usesCustomInserter&#39; fla...
[US]{MIN/MAX} - Binary minimum or maximum or signed or unsigned integers.
Definition: ISDOpcodes.h:384
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:781
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:435
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1509
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
void printrWithDepth(raw_ostream &O, const SelectionDAG *G=nullptr, unsigned depth=100) const
Print a SelectionDAG node and children up to depth "depth." The given SelectionDAG allows target-spec...
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
const Triple & getTargetTriple() const
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
bool hasMips64() const
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:210
bool hasDSPR2() const
static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, const MipsSubtarget &Subtarget)
A description of a memory reference used in the backend.
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
Shift and rotation operations.
Definition: ISDOpcodes.h:410
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:412
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:191
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned getScalarValueSizeInBits() const
bool isGP64bit() const
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn&#39;t supported on the target and indicate what to d...
SimpleValueType SimpleTy
static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG)
bool hasMips32r6() const
static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op)
unsigned Intr
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:401
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
This file implements a class to represent arbitrary precision integral constant values and operations...
static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:298
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian)
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:478
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:429
static bool isVECTOR_SHUFFLE_SPLATI(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
static bool isConstantOrUndef(const SDValue Op)
unsigned getIncomingArgSize() const
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:852
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:959
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification, or lowering of the constant.
Definition: ISDOpcodes.h:125
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:576
static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
amdgpu Simplify well known AMD library false Value * Callee
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
This class is used to represent ISD::STORE nodes.
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:524
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:327
TargetInstrInfo - Interface to description of machine instruction set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:636
static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
virtual void getOpndList(SmallVectorImpl< SDValue > &Ops, std::deque< std::pair< unsigned, SDValue >> &RegsToPass, bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const
This function fills Ops, which is the list of operands that will later be used when a function call n...
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1166
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:364
const SDValue & getBasePtr() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:166
bool isAllOnesValue() const
Determine if all bits are set.
Definition: APInt.h:396
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc)
Machine Value Type.
bool isLittleEndian() const
Tests whether the target triple is little endian.
Definition: Triple.cpp:1493
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, const MipsSubtarget &Subtarget)
static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, bool BigEndian, SelectionDAG &DAG)
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
Simple binary floating point operators.
Definition: ISDOpcodes.h:283
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC)
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
const SDValue & getOperand(unsigned Num) const
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:332
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:232
static bool isVectorAllOnes(SDValue N)
static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
const APInt & getAPIntValue() const
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static mvt_range vector_valuetypes()
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:556
SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
Extended Value Type.
Definition: ValueTypes.h:34
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
bool isSingleFloat() const
unsigned ceilLogBase2() const
Definition: APInt.h:1751
This structure contains all information that is necessary for lowering calls.
size_t size() const
Definition: SmallVector.h:53
bool hasMips64r6() const
This class contains a discriminated union of information about pointers in memory operands...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isABI_N32() const
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:971
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:404
Iterator for intrusive lists based on ilist_node.
CCState - This class holds information needed while lowering arguments and return values...
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
static SDValue truncateVecElts(SDValue Op, SelectionDAG &DAG)
static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
bool isLittle() const
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:534
const MipsRegisterInfo * getRegisterInfo() const override
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:339
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:265
double Log2(double Value)
Return the log base 2 of the specified value.
Definition: MathExtras.h:528
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:222
value_iterator value_begin() const
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:734
static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG)
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:1044
bool hasCnMips() const
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:96
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:644
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc)
Represents one node in the SelectionDAG.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
int64_t getImm() const
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
bool hasDSP() const
unsigned logBase2() const
Definition: APInt.h:1748
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG)
EVT getMemoryVT() const
Return the type of the in-memory value.
Class for arbitrary precision integers.
Definition: APInt.h:70
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:420
static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
bool isPowerOf2() const
Check if this APInt&#39;s value is a power of two greater than zero.
Definition: APInt.h:464
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:468
static cl::opt< bool > NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), cl::desc("Expand double precision loads and " "stores to their single precision " "counterparts"))
static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:312
int getMaskElt(unsigned Idx) const
static bool isBitwiseInverse(SDValue N, SDValue OfNode)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:387
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:206
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:182
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:614
const TargetRegisterClass * getRepRegClassFor(MVT VT) const override
Return the &#39;representative&#39; register class for the specified value type.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:403
Flags getFlags() const
Return the raw flags of the source value,.
SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc, SDValue Imm, bool BigEndian)
MipsFunctionInfo - This class is derived from MachineFunction private Mips target-specific informatio...
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
bool useOddSPReg() const
static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
unsigned getOpcode() const
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:608
SDValue getValue(unsigned R) const
unsigned getInRegsParamsCount() const
bool hasMips32r2() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
bool isABI_N64() const
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts)
Test whether V has a splatted value for all the demanded elements.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
LLVM Value Representation.
Definition: Value.h:73
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:302
unsigned getSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
SDValue getValueType(EVT)
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
bool isUndef() const
Return true if the type of the node type undefined.
static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
bool useSoftFloat() const
MipsSETargetLowering(const MipsTargetMachine &TM, const MipsSubtarget &STI)
IRTranslator LLVM IR MI
static SDValue extractLOHI(SDValue Op, const SDLoc &DL, SelectionDAG &DAG)
static cl::opt< bool > UseMipsTailCalls("mips-tail-calls", cl::Hidden, cl::desc("MIPS: permit tail calls."), cl::init(false))
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:443
Conversion operators.
Definition: ISDOpcodes.h:465
const SDValue & getOperand(unsigned i) const
bool systemSupportsUnalignedAccess() const
Does the system support unaligned memory access.
void addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC)
Enable MSA support for the given integer type and Register class.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:474
#define LLVM_DEBUG(X)
Definition: Debug.h:123
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:584
bool isFP64bit() const
LLVMContext * getContext() const
Definition: SelectionDAG.h:407
static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
void addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC)
Enable MSA support for the given floating-point type and Register class.
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool hasMSA() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override
EVT is not used in-tree, but is used by out-of-tree target.
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:380
This class is used to represent ISD::LOAD nodes.