LLVM  8.0.1
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines an instruction selector for the AArch64 target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64TargetMachine.h"
16 #include "llvm/ADT/APSInt.h"
18 #include "llvm/IR/Function.h" // To access function attributes.
19 #include "llvm/IR/GlobalValue.h"
20 #include "llvm/IR/Intrinsics.h"
21 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/KnownBits.h"
26 
27 using namespace llvm;
28 
29 #define DEBUG_TYPE "aarch64-isel"
30 
31 //===--------------------------------------------------------------------===//
32 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
33 /// instructions for SelectionDAG operations.
34 ///
35 namespace {
36 
37 class AArch64DAGToDAGISel : public SelectionDAGISel {
38 
39  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
40  /// make the right decision when generating code for different targets.
41  const AArch64Subtarget *Subtarget;
42 
43  bool ForCodeSize;
44 
45 public:
46  explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
47  CodeGenOpt::Level OptLevel)
48  : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr),
49  ForCodeSize(false) {}
50 
51  StringRef getPassName() const override {
52  return "AArch64 Instruction Selection";
53  }
54 
55  bool runOnMachineFunction(MachineFunction &MF) override {
56  ForCodeSize = MF.getFunction().optForSize();
57  Subtarget = &MF.getSubtarget<AArch64Subtarget>();
59  }
60 
61  void Select(SDNode *Node) override;
62 
63  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
64  /// inline asm expressions.
65  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
66  unsigned ConstraintID,
67  std::vector<SDValue> &OutOps) override;
68 
69  bool tryMLAV64LaneV128(SDNode *N);
70  bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N);
71  bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
72  bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
73  bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
74  bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
75  return SelectShiftedRegister(N, false, Reg, Shift);
76  }
77  bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
78  return SelectShiftedRegister(N, true, Reg, Shift);
79  }
80  bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
81  return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
82  }
83  bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
84  return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
85  }
86  bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
87  return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
88  }
89  bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
90  return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
91  }
92  bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
93  return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
94  }
95  bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
96  return SelectAddrModeIndexed(N, 1, Base, OffImm);
97  }
98  bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
99  return SelectAddrModeIndexed(N, 2, Base, OffImm);
100  }
101  bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
102  return SelectAddrModeIndexed(N, 4, Base, OffImm);
103  }
104  bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
105  return SelectAddrModeIndexed(N, 8, Base, OffImm);
106  }
107  bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
108  return SelectAddrModeIndexed(N, 16, Base, OffImm);
109  }
110  bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
111  return SelectAddrModeUnscaled(N, 1, Base, OffImm);
112  }
113  bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
114  return SelectAddrModeUnscaled(N, 2, Base, OffImm);
115  }
116  bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
117  return SelectAddrModeUnscaled(N, 4, Base, OffImm);
118  }
119  bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
120  return SelectAddrModeUnscaled(N, 8, Base, OffImm);
121  }
122  bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
123  return SelectAddrModeUnscaled(N, 16, Base, OffImm);
124  }
125 
126  template<int Width>
127  bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
128  SDValue &SignExtend, SDValue &DoShift) {
129  return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
130  }
131 
132  template<int Width>
133  bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
134  SDValue &SignExtend, SDValue &DoShift) {
135  return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
136  }
137 
138 
139  /// Form sequences of consecutive 64/128-bit registers for use in NEON
140  /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
141  /// between 1 and 4 elements. If it contains a single element that is returned
142  /// unchanged; otherwise a REG_SEQUENCE value is returned.
143  SDValue createDTuple(ArrayRef<SDValue> Vecs);
144  SDValue createQTuple(ArrayRef<SDValue> Vecs);
145 
146  /// Generic helper for the createDTuple/createQTuple
147  /// functions. Those should almost always be called instead.
148  SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
149  const unsigned SubRegs[]);
150 
151  void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
152 
153  bool tryIndexedLoad(SDNode *N);
154 
155  void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
156  unsigned SubRegIdx);
157  void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
158  unsigned SubRegIdx);
159  void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
160  void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
161 
162  void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
163  void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
164  void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
165  void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
166 
167  bool tryBitfieldExtractOp(SDNode *N);
168  bool tryBitfieldExtractOpFromSExt(SDNode *N);
169  bool tryBitfieldInsertOp(SDNode *N);
170  bool tryBitfieldInsertInZeroOp(SDNode *N);
171  bool tryShiftAmountMod(SDNode *N);
172 
173  bool tryReadRegister(SDNode *N);
174  bool tryWriteRegister(SDNode *N);
175 
176 // Include the pieces autogenerated from the target description.
177 #include "AArch64GenDAGISel.inc"
178 
179 private:
180  bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
181  SDValue &Shift);
182  bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
183  SDValue &OffImm);
184  bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
185  SDValue &OffImm);
186  bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
187  SDValue &OffImm);
188  bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
189  SDValue &Offset, SDValue &SignExtend,
190  SDValue &DoShift);
191  bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
192  SDValue &Offset, SDValue &SignExtend,
193  SDValue &DoShift);
194  bool isWorthFolding(SDValue V) const;
195  bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
196  SDValue &Offset, SDValue &SignExtend);
197 
198  template<unsigned RegWidth>
199  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
200  return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
201  }
202 
203  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
204 
205  bool SelectCMP_SWAP(SDNode *N);
206 
207 };
208 } // end anonymous namespace
209 
210 /// isIntImmediate - This method tests to see if the node is a constant
211 /// operand. If so Imm will receive the 32-bit value.
212 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
213  if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
214  Imm = C->getZExtValue();
215  return true;
216  }
217  return false;
218 }
219 
220 // isIntImmediate - This method tests to see if a constant operand.
221 // If so Imm will receive the value.
222 static bool isIntImmediate(SDValue N, uint64_t &Imm) {
223  return isIntImmediate(N.getNode(), Imm);
224 }
225 
226 // isOpcWithIntImmediate - This method tests to see if the node is a specific
227 // opcode and that it has a immediate integer right operand.
228 // If so Imm will receive the 32 bit value.
229 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
230  uint64_t &Imm) {
231  return N->getOpcode() == Opc &&
232  isIntImmediate(N->getOperand(1).getNode(), Imm);
233 }
234 
235 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
236  const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
237  switch(ConstraintID) {
238  default:
239  llvm_unreachable("Unexpected asm memory constraint");
243  // We need to make sure that this one operand does not end up in XZR, thus
244  // require the address to be in a PointerRegClass register.
245  const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
246  const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
247  SDLoc dl(Op);
248  SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
249  SDValue NewOp =
250  SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
251  dl, Op.getValueType(),
252  Op, RC), 0);
253  OutOps.push_back(NewOp);
254  return false;
255  }
256  return true;
257 }
258 
259 /// SelectArithImmed - Select an immediate value that can be represented as
260 /// a 12-bit value shifted left by either 0 or 12. If so, return true with
261 /// Val set to the 12-bit value and Shift set to the shifter operand.
262 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
263  SDValue &Shift) {
264  // This function is called from the addsub_shifted_imm ComplexPattern,
265  // which lists [imm] as the list of opcode it's interested in, however
266  // we still need to check whether the operand is actually an immediate
267  // here because the ComplexPattern opcode list is only used in
268  // root-level opcode matching.
269  if (!isa<ConstantSDNode>(N.getNode()))
270  return false;
271 
272  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
273  unsigned ShiftAmt;
274 
275  if (Immed >> 12 == 0) {
276  ShiftAmt = 0;
277  } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
278  ShiftAmt = 12;
279  Immed = Immed >> 12;
280  } else
281  return false;
282 
283  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
284  SDLoc dl(N);
285  Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
286  Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
287  return true;
288 }
289 
290 /// SelectNegArithImmed - As above, but negates the value before trying to
291 /// select it.
292 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
293  SDValue &Shift) {
294  // This function is called from the addsub_shifted_imm ComplexPattern,
295  // which lists [imm] as the list of opcode it's interested in, however
296  // we still need to check whether the operand is actually an immediate
297  // here because the ComplexPattern opcode list is only used in
298  // root-level opcode matching.
299  if (!isa<ConstantSDNode>(N.getNode()))
300  return false;
301 
302  // The immediate operand must be a 24-bit zero-extended immediate.
303  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
304 
305  // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
306  // have the opposite effect on the C flag, so this pattern mustn't match under
307  // those circumstances.
308  if (Immed == 0)
309  return false;
310 
311  if (N.getValueType() == MVT::i32)
312  Immed = ~((uint32_t)Immed) + 1;
313  else
314  Immed = ~Immed + 1ULL;
315  if (Immed & 0xFFFFFFFFFF000000ULL)
316  return false;
317 
318  Immed &= 0xFFFFFFULL;
319  return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
320  Shift);
321 }
322 
323 /// getShiftTypeForNode - Translate a shift node to the corresponding
324 /// ShiftType value.
326  switch (N.getOpcode()) {
327  default:
329  case ISD::SHL:
330  return AArch64_AM::LSL;
331  case ISD::SRL:
332  return AArch64_AM::LSR;
333  case ISD::SRA:
334  return AArch64_AM::ASR;
335  case ISD::ROTR:
336  return AArch64_AM::ROR;
337  }
338 }
339 
340 /// Determine whether it is worth it to fold SHL into the addressing
341 /// mode.
342 static bool isWorthFoldingSHL(SDValue V) {
343  assert(V.getOpcode() == ISD::SHL && "invalid opcode");
344  // It is worth folding logical shift of up to three places.
345  auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
346  if (!CSD)
347  return false;
348  unsigned ShiftVal = CSD->getZExtValue();
349  if (ShiftVal > 3)
350  return false;
351 
352  // Check if this particular node is reused in any non-memory related
353  // operation. If yes, do not try to fold this node into the address
354  // computation, since the computation will be kept.
355  const SDNode *Node = V.getNode();
356  for (SDNode *UI : Node->uses())
357  if (!isa<MemSDNode>(*UI))
358  for (SDNode *UII : UI->uses())
359  if (!isa<MemSDNode>(*UII))
360  return false;
361  return true;
362 }
363 
364 /// Determine whether it is worth to fold V into an extended register.
365 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
366  // Trivial if we are optimizing for code size or if there is only
367  // one use of the value.
368  if (ForCodeSize || V.hasOneUse())
369  return true;
370  // If a subtarget has a fastpath LSL we can fold a logical shift into
371  // the addressing mode and save a cycle.
372  if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL &&
374  return true;
375  if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) {
376  const SDValue LHS = V.getOperand(0);
377  const SDValue RHS = V.getOperand(1);
378  if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
379  return true;
380  if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
381  return true;
382  }
383 
384  // It hurts otherwise, since the value will be reused.
385  return false;
386 }
387 
388 /// SelectShiftedRegister - Select a "shifted register" operand. If the value
389 /// is not shifted, set the Shift operand to default of "LSL 0". The logical
390 /// instructions allow the shifted register to be rotated, but the arithmetic
391 /// instructions do not. The AllowROR parameter specifies whether ROR is
392 /// supported.
393 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
394  SDValue &Reg, SDValue &Shift) {
396  if (ShType == AArch64_AM::InvalidShiftExtend)
397  return false;
398  if (!AllowROR && ShType == AArch64_AM::ROR)
399  return false;
400 
401  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
402  unsigned BitSize = N.getValueSizeInBits();
403  unsigned Val = RHS->getZExtValue() & (BitSize - 1);
404  unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
405 
406  Reg = N.getOperand(0);
407  Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
408  return isWorthFolding(N);
409  }
410 
411  return false;
412 }
413 
414 /// getExtendTypeForNode - Translate an extend node to the corresponding
415 /// ExtendType value.
417 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
418  if (N.getOpcode() == ISD::SIGN_EXTEND ||
420  EVT SrcVT;
422  SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
423  else
424  SrcVT = N.getOperand(0).getValueType();
425 
426  if (!IsLoadStore && SrcVT == MVT::i8)
427  return AArch64_AM::SXTB;
428  else if (!IsLoadStore && SrcVT == MVT::i16)
429  return AArch64_AM::SXTH;
430  else if (SrcVT == MVT::i32)
431  return AArch64_AM::SXTW;
432  assert(SrcVT != MVT::i64 && "extend from 64-bits?");
433 
435  } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
436  N.getOpcode() == ISD::ANY_EXTEND) {
437  EVT SrcVT = N.getOperand(0).getValueType();
438  if (!IsLoadStore && SrcVT == MVT::i8)
439  return AArch64_AM::UXTB;
440  else if (!IsLoadStore && SrcVT == MVT::i16)
441  return AArch64_AM::UXTH;
442  else if (SrcVT == MVT::i32)
443  return AArch64_AM::UXTW;
444  assert(SrcVT != MVT::i64 && "extend from 64-bits?");
445 
447  } else if (N.getOpcode() == ISD::AND) {
449  if (!CSD)
451  uint64_t AndMask = CSD->getZExtValue();
452 
453  switch (AndMask) {
454  default:
456  case 0xFF:
457  return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
458  case 0xFFFF:
459  return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
460  case 0xFFFFFFFF:
461  return AArch64_AM::UXTW;
462  }
463  }
464 
466 }
467 
468 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
469 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
470  if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
472  return false;
473 
474  SDValue SV = DL->getOperand(0);
475  if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
476  return false;
477 
478  SDValue EV = SV.getOperand(1);
479  if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
480  return false;
481 
482  ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
483  ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
484  LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
485  LaneOp = EV.getOperand(0);
486 
487  return true;
488 }
489 
490 // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
491 // high lane extract.
492 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
493  SDValue &LaneOp, int &LaneIdx) {
494 
495  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
496  std::swap(Op0, Op1);
497  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
498  return false;
499  }
500  StdOp = Op1;
501  return true;
502 }
503 
504 /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
505 /// is a lane in the upper half of a 128-bit vector. Recognize and select this
506 /// so that we don't emit unnecessary lane extracts.
507 bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) {
508  SDLoc dl(N);
509  SDValue Op0 = N->getOperand(0);
510  SDValue Op1 = N->getOperand(1);
511  SDValue MLAOp1; // Will hold ordinary multiplicand for MLA.
512  SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA.
513  int LaneIdx = -1; // Will hold the lane index.
514 
515  if (Op1.getOpcode() != ISD::MUL ||
516  !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
517  LaneIdx)) {
518  std::swap(Op0, Op1);
519  if (Op1.getOpcode() != ISD::MUL ||
520  !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
521  LaneIdx))
522  return false;
523  }
524 
525  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
526 
527  SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
528 
529  unsigned MLAOpc = ~0U;
530 
531  switch (N->getSimpleValueType(0).SimpleTy) {
532  default:
533  llvm_unreachable("Unrecognized MLA.");
534  case MVT::v4i16:
535  MLAOpc = AArch64::MLAv4i16_indexed;
536  break;
537  case MVT::v8i16:
538  MLAOpc = AArch64::MLAv8i16_indexed;
539  break;
540  case MVT::v2i32:
541  MLAOpc = AArch64::MLAv2i32_indexed;
542  break;
543  case MVT::v4i32:
544  MLAOpc = AArch64::MLAv4i32_indexed;
545  break;
546  }
547 
548  ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops));
549  return true;
550 }
551 
552 bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) {
553  SDLoc dl(N);
554  SDValue SMULLOp0;
555  SDValue SMULLOp1;
556  int LaneIdx;
557 
558  if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
559  LaneIdx))
560  return false;
561 
562  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
563 
564  SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
565 
566  unsigned SMULLOpc = ~0U;
567 
568  if (IntNo == Intrinsic::aarch64_neon_smull) {
569  switch (N->getSimpleValueType(0).SimpleTy) {
570  default:
571  llvm_unreachable("Unrecognized SMULL.");
572  case MVT::v4i32:
573  SMULLOpc = AArch64::SMULLv4i16_indexed;
574  break;
575  case MVT::v2i64:
576  SMULLOpc = AArch64::SMULLv2i32_indexed;
577  break;
578  }
579  } else if (IntNo == Intrinsic::aarch64_neon_umull) {
580  switch (N->getSimpleValueType(0).SimpleTy) {
581  default:
582  llvm_unreachable("Unrecognized SMULL.");
583  case MVT::v4i32:
584  SMULLOpc = AArch64::UMULLv4i16_indexed;
585  break;
586  case MVT::v2i64:
587  SMULLOpc = AArch64::UMULLv2i32_indexed;
588  break;
589  }
590  } else
591  llvm_unreachable("Unrecognized intrinsic.");
592 
593  ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops));
594  return true;
595 }
596 
597 /// Instructions that accept extend modifiers like UXTW expect the register
598 /// being extended to be a GPR32, but the incoming DAG might be acting on a
599 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
600 /// this is the case.
602  if (N.getValueType() == MVT::i32)
603  return N;
604 
605  SDLoc dl(N);
606  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
607  MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
608  dl, MVT::i32, N, SubReg);
609  return SDValue(Node, 0);
610 }
611 
612 
613 /// SelectArithExtendedRegister - Select a "extended register" operand. This
614 /// operand folds in an extend followed by an optional left shift.
615 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
616  SDValue &Shift) {
617  unsigned ShiftVal = 0;
619 
620  if (N.getOpcode() == ISD::SHL) {
622  if (!CSD)
623  return false;
624  ShiftVal = CSD->getZExtValue();
625  if (ShiftVal > 4)
626  return false;
627 
628  Ext = getExtendTypeForNode(N.getOperand(0));
630  return false;
631 
632  Reg = N.getOperand(0).getOperand(0);
633  } else {
634  Ext = getExtendTypeForNode(N);
636  return false;
637 
638  Reg = N.getOperand(0);
639 
640  // Don't match if free 32-bit -> 64-bit zext can be used instead.
641  if (Ext == AArch64_AM::UXTW &&
642  Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode()))
643  return false;
644  }
645 
646  // AArch64 mandates that the RHS of the operation must use the smallest
647  // register class that could contain the size being extended from. Thus,
648  // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
649  // there might not be an actual 32-bit value in the program. We can
650  // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
651  assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
652  Reg = narrowIfNeeded(CurDAG, Reg);
653  Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
654  MVT::i32);
655  return isWorthFolding(N);
656 }
657 
658 /// If there's a use of this ADDlow that's not itself a load/store then we'll
659 /// need to create a real ADD instruction from it anyway and there's no point in
660 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's
661 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
662 /// leads to duplicated ADRP instructions.
664  for (auto Use : N->uses()) {
665  if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
666  Use->getOpcode() != ISD::ATOMIC_LOAD &&
667  Use->getOpcode() != ISD::ATOMIC_STORE)
668  return false;
669 
670  // ldar and stlr have much more restrictive addressing modes (just a
671  // register).
672  if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getOrdering()))
673  return false;
674  }
675 
676  return true;
677 }
678 
679 /// SelectAddrModeIndexed7S - Select a "register plus scaled signed 7-bit
680 /// immediate" address. The "Size" argument is the size in bytes of the memory
681 /// reference, which determines the scale.
682 bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size,
683  SDValue &Base,
684  SDValue &OffImm) {
685  SDLoc dl(N);
686  const DataLayout &DL = CurDAG->getDataLayout();
687  const TargetLowering *TLI = getTargetLowering();
688  if (N.getOpcode() == ISD::FrameIndex) {
689  int FI = cast<FrameIndexSDNode>(N)->getIndex();
690  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
691  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
692  return true;
693  }
694 
695  // As opposed to the (12-bit) Indexed addressing mode below, the 7-bit signed
696  // selected here doesn't support labels/immediates, only base+offset.
697 
698  if (CurDAG->isBaseWithConstantOffset(N)) {
699  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
700  int64_t RHSC = RHS->getSExtValue();
701  unsigned Scale = Log2_32(Size);
702  if ((RHSC & (Size - 1)) == 0 && RHSC >= -(0x40 << Scale) &&
703  RHSC < (0x40 << Scale)) {
704  Base = N.getOperand(0);
705  if (Base.getOpcode() == ISD::FrameIndex) {
706  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
707  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
708  }
709  OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
710  return true;
711  }
712  }
713  }
714 
715  // Base only. The address will be materialized into a register before
716  // the memory is accessed.
717  // add x0, Xbase, #offset
718  // stp x1, x2, [x0]
719  Base = N;
720  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
721  return true;
722 }
723 
724 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
725 /// immediate" address. The "Size" argument is the size in bytes of the memory
726 /// reference, which determines the scale.
727 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
728  SDValue &Base, SDValue &OffImm) {
729  SDLoc dl(N);
730  const DataLayout &DL = CurDAG->getDataLayout();
731  const TargetLowering *TLI = getTargetLowering();
732  if (N.getOpcode() == ISD::FrameIndex) {
733  int FI = cast<FrameIndexSDNode>(N)->getIndex();
734  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
735  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
736  return true;
737  }
738 
740  GlobalAddressSDNode *GAN =
742  Base = N.getOperand(0);
743  OffImm = N.getOperand(1);
744  if (!GAN)
745  return true;
746 
747  if (GAN->getOffset() % Size == 0) {
748  const GlobalValue *GV = GAN->getGlobal();
749  unsigned Alignment = GV->getAlignment();
750  Type *Ty = GV->getValueType();
751  if (Alignment == 0 && Ty->isSized())
752  Alignment = DL.getABITypeAlignment(Ty);
753 
754  if (Alignment >= Size)
755  return true;
756  }
757  }
758 
759  if (CurDAG->isBaseWithConstantOffset(N)) {
760  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
761  int64_t RHSC = (int64_t)RHS->getZExtValue();
762  unsigned Scale = Log2_32(Size);
763  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
764  Base = N.getOperand(0);
765  if (Base.getOpcode() == ISD::FrameIndex) {
766  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
767  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
768  }
769  OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
770  return true;
771  }
772  }
773  }
774 
775  // Before falling back to our general case, check if the unscaled
776  // instructions can handle this. If so, that's preferable.
777  if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
778  return false;
779 
780  // Base only. The address will be materialized into a register before
781  // the memory is accessed.
782  // add x0, Xbase, #offset
783  // ldr x0, [x0]
784  Base = N;
785  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
786  return true;
787 }
788 
789 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
790 /// immediate" address. This should only match when there is an offset that
791 /// is not valid for a scaled immediate addressing mode. The "Size" argument
792 /// is the size in bytes of the memory reference, which is needed here to know
793 /// what is valid for a scaled immediate.
794 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
795  SDValue &Base,
796  SDValue &OffImm) {
797  if (!CurDAG->isBaseWithConstantOffset(N))
798  return false;
799  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
800  int64_t RHSC = RHS->getSExtValue();
801  // If the offset is valid as a scaled immediate, don't match here.
802  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
803  RHSC < (0x1000 << Log2_32(Size)))
804  return false;
805  if (RHSC >= -256 && RHSC < 256) {
806  Base = N.getOperand(0);
807  if (Base.getOpcode() == ISD::FrameIndex) {
808  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
809  const TargetLowering *TLI = getTargetLowering();
810  Base = CurDAG->getTargetFrameIndex(
811  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
812  }
813  OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
814  return true;
815  }
816  }
817  return false;
818 }
819 
820 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
821  SDLoc dl(N);
822  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
823  SDValue ImpDef = SDValue(
824  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
825  MachineSDNode *Node = CurDAG->getMachineNode(
826  TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg);
827  return SDValue(Node, 0);
828 }
829 
830 /// Check if the given SHL node (\p N), can be used to form an
831 /// extended register for an addressing mode.
832 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
833  bool WantExtend, SDValue &Offset,
834  SDValue &SignExtend) {
835  assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
837  if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
838  return false;
839 
840  SDLoc dl(N);
841  if (WantExtend) {
843  getExtendTypeForNode(N.getOperand(0), true);
845  return false;
846 
847  Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
848  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
849  MVT::i32);
850  } else {
851  Offset = N.getOperand(0);
852  SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
853  }
854 
855  unsigned LegalShiftVal = Log2_32(Size);
856  unsigned ShiftVal = CSD->getZExtValue();
857 
858  if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
859  return false;
860 
861  return isWorthFolding(N);
862 }
863 
864 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
865  SDValue &Base, SDValue &Offset,
866  SDValue &SignExtend,
867  SDValue &DoShift) {
868  if (N.getOpcode() != ISD::ADD)
869  return false;
870  SDValue LHS = N.getOperand(0);
871  SDValue RHS = N.getOperand(1);
872  SDLoc dl(N);
873 
874  // We don't want to match immediate adds here, because they are better lowered
875  // to the register-immediate addressing modes.
876  if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
877  return false;
878 
879  // Check if this particular node is reused in any non-memory related
880  // operation. If yes, do not try to fold this node into the address
881  // computation, since the computation will be kept.
882  const SDNode *Node = N.getNode();
883  for (SDNode *UI : Node->uses()) {
884  if (!isa<MemSDNode>(*UI))
885  return false;
886  }
887 
888  // Remember if it is worth folding N when it produces extended register.
889  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
890 
891  // Try to match a shifted extend on the RHS.
892  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
893  SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
894  Base = LHS;
895  DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
896  return true;
897  }
898 
899  // Try to match a shifted extend on the LHS.
900  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
901  SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
902  Base = RHS;
903  DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
904  return true;
905  }
906 
907  // There was no shift, whatever else we find.
908  DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
909 
911  // Try to match an unshifted extend on the LHS.
912  if (IsExtendedRegisterWorthFolding &&
913  (Ext = getExtendTypeForNode(LHS, true)) !=
915  Base = RHS;
916  Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
917  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
918  MVT::i32);
919  if (isWorthFolding(LHS))
920  return true;
921  }
922 
923  // Try to match an unshifted extend on the RHS.
924  if (IsExtendedRegisterWorthFolding &&
925  (Ext = getExtendTypeForNode(RHS, true)) !=
927  Base = LHS;
928  Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
929  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
930  MVT::i32);
931  if (isWorthFolding(RHS))
932  return true;
933  }
934 
935  return false;
936 }
937 
938 // Check if the given immediate is preferred by ADD. If an immediate can be
939 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
940 // encoded by one MOVZ, return true.
941 static bool isPreferredADD(int64_t ImmOff) {
942  // Constant in [0x0, 0xfff] can be encoded in ADD.
943  if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
944  return true;
945  // Check if it can be encoded in an "ADD LSL #12".
946  if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
947  // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
948  return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
949  (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
950  return false;
951 }
952 
953 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
954  SDValue &Base, SDValue &Offset,
955  SDValue &SignExtend,
956  SDValue &DoShift) {
957  if (N.getOpcode() != ISD::ADD)
958  return false;
959  SDValue LHS = N.getOperand(0);
960  SDValue RHS = N.getOperand(1);
961  SDLoc DL(N);
962 
963  // Check if this particular node is reused in any non-memory related
964  // operation. If yes, do not try to fold this node into the address
965  // computation, since the computation will be kept.
966  const SDNode *Node = N.getNode();
967  for (SDNode *UI : Node->uses()) {
968  if (!isa<MemSDNode>(*UI))
969  return false;
970  }
971 
972  // Watch out if RHS is a wide immediate, it can not be selected into
973  // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
974  // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
975  // instructions like:
976  // MOV X0, WideImmediate
977  // ADD X1, BaseReg, X0
978  // LDR X2, [X1, 0]
979  // For such situation, using [BaseReg, XReg] addressing mode can save one
980  // ADD/SUB:
981  // MOV X0, WideImmediate
982  // LDR X2, [BaseReg, X0]
983  if (isa<ConstantSDNode>(RHS)) {
984  int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
985  unsigned Scale = Log2_32(Size);
986  // Skip the immediate can be selected by load/store addressing mode.
987  // Also skip the immediate can be encoded by a single ADD (SUB is also
988  // checked by using -ImmOff).
989  if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
990  isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
991  return false;
992 
993  SDValue Ops[] = { RHS };
994  SDNode *MOVI =
995  CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
996  SDValue MOVIV = SDValue(MOVI, 0);
997  // This ADD of two X register will be selected into [Reg+Reg] mode.
998  N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
999  }
1000 
1001  // Remember if it is worth folding N when it produces extended register.
1002  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
1003 
1004  // Try to match a shifted extend on the RHS.
1005  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1006  SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1007  Base = LHS;
1008  DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1009  return true;
1010  }
1011 
1012  // Try to match a shifted extend on the LHS.
1013  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1014  SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1015  Base = RHS;
1016  DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1017  return true;
1018  }
1019 
1020  // Match any non-shifted, non-extend, non-immediate add expression.
1021  Base = LHS;
1022  Offset = RHS;
1023  SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1024  DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1025  // Reg1 + Reg2 is free: no check needed.
1026  return true;
1027 }
1028 
1029 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1030  static const unsigned RegClassIDs[] = {
1031  AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1032  static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1033  AArch64::dsub2, AArch64::dsub3};
1034 
1035  return createTuple(Regs, RegClassIDs, SubRegs);
1036 }
1037 
1038 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1039  static const unsigned RegClassIDs[] = {
1040  AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1041  static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1042  AArch64::qsub2, AArch64::qsub3};
1043 
1044  return createTuple(Regs, RegClassIDs, SubRegs);
1045 }
1046 
1047 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1048  const unsigned RegClassIDs[],
1049  const unsigned SubRegs[]) {
1050  // There's no special register-class for a vector-list of 1 element: it's just
1051  // a vector.
1052  if (Regs.size() == 1)
1053  return Regs[0];
1054 
1055  assert(Regs.size() >= 2 && Regs.size() <= 4);
1056 
1057  SDLoc DL(Regs[0]);
1058 
1060 
1061  // First operand of REG_SEQUENCE is the desired RegClass.
1062  Ops.push_back(
1063  CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1064 
1065  // Then we get pairs of source & subregister-position for the components.
1066  for (unsigned i = 0; i < Regs.size(); ++i) {
1067  Ops.push_back(Regs[i]);
1068  Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1069  }
1070 
1071  SDNode *N =
1072  CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1073  return SDValue(N, 0);
1074 }
1075 
1076 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1077  bool isExt) {
1078  SDLoc dl(N);
1079  EVT VT = N->getValueType(0);
1080 
1081  unsigned ExtOff = isExt;
1082 
1083  // Form a REG_SEQUENCE to force register allocation.
1084  unsigned Vec0Off = ExtOff + 1;
1085  SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1086  N->op_begin() + Vec0Off + NumVecs);
1087  SDValue RegSeq = createQTuple(Regs);
1088 
1090  if (isExt)
1091  Ops.push_back(N->getOperand(1));
1092  Ops.push_back(RegSeq);
1093  Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1094  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1095 }
1096 
1097 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1098  LoadSDNode *LD = cast<LoadSDNode>(N);
1099  if (LD->isUnindexed())
1100  return false;
1101  EVT VT = LD->getMemoryVT();
1102  EVT DstVT = N->getValueType(0);
1104  bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1105 
1106  // We're not doing validity checking here. That was done when checking
1107  // if we should mark the load as indexed or not. We're just selecting
1108  // the right instruction.
1109  unsigned Opcode = 0;
1110 
1112  bool InsertTo64 = false;
1113  if (VT == MVT::i64)
1114  Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1115  else if (VT == MVT::i32) {
1116  if (ExtType == ISD::NON_EXTLOAD)
1117  Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1118  else if (ExtType == ISD::SEXTLOAD)
1119  Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1120  else {
1121  Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1122  InsertTo64 = true;
1123  // The result of the load is only i32. It's the subreg_to_reg that makes
1124  // it into an i64.
1125  DstVT = MVT::i32;
1126  }
1127  } else if (VT == MVT::i16) {
1128  if (ExtType == ISD::SEXTLOAD) {
1129  if (DstVT == MVT::i64)
1130  Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1131  else
1132  Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1133  } else {
1134  Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1135  InsertTo64 = DstVT == MVT::i64;
1136  // The result of the load is only i32. It's the subreg_to_reg that makes
1137  // it into an i64.
1138  DstVT = MVT::i32;
1139  }
1140  } else if (VT == MVT::i8) {
1141  if (ExtType == ISD::SEXTLOAD) {
1142  if (DstVT == MVT::i64)
1143  Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1144  else
1145  Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1146  } else {
1147  Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1148  InsertTo64 = DstVT == MVT::i64;
1149  // The result of the load is only i32. It's the subreg_to_reg that makes
1150  // it into an i64.
1151  DstVT = MVT::i32;
1152  }
1153  } else if (VT == MVT::f16) {
1154  Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1155  } else if (VT == MVT::f32) {
1156  Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1157  } else if (VT == MVT::f64 || VT.is64BitVector()) {
1158  Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1159  } else if (VT.is128BitVector()) {
1160  Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1161  } else
1162  return false;
1163  SDValue Chain = LD->getChain();
1164  SDValue Base = LD->getBasePtr();
1165  ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1166  int OffsetVal = (int)OffsetOp->getZExtValue();
1167  SDLoc dl(N);
1168  SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1169  SDValue Ops[] = { Base, Offset, Chain };
1170  SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1171  MVT::Other, Ops);
1172  // Either way, we're replacing the node, so tell the caller that.
1173  SDValue LoadedVal = SDValue(Res, 1);
1174  if (InsertTo64) {
1175  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1176  LoadedVal =
1177  SDValue(CurDAG->getMachineNode(
1178  AArch64::SUBREG_TO_REG, dl, MVT::i64,
1179  CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1180  SubReg),
1181  0);
1182  }
1183 
1184  ReplaceUses(SDValue(N, 0), LoadedVal);
1185  ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1186  ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1187  CurDAG->RemoveDeadNode(N);
1188  return true;
1189 }
1190 
1191 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1192  unsigned SubRegIdx) {
1193  SDLoc dl(N);
1194  EVT VT = N->getValueType(0);
1195  SDValue Chain = N->getOperand(0);
1196 
1197  SDValue Ops[] = {N->getOperand(2), // Mem operand;
1198  Chain};
1199 
1200  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1201 
1202  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1203  SDValue SuperReg = SDValue(Ld, 0);
1204  for (unsigned i = 0; i < NumVecs; ++i)
1205  ReplaceUses(SDValue(N, i),
1206  CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1207 
1208  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1209 
1210  // Transfer memoperands.
1211  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1212  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1213 
1214  CurDAG->RemoveDeadNode(N);
1215 }
1216 
1217 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1218  unsigned Opc, unsigned SubRegIdx) {
1219  SDLoc dl(N);
1220  EVT VT = N->getValueType(0);
1221  SDValue Chain = N->getOperand(0);
1222 
1223  SDValue Ops[] = {N->getOperand(1), // Mem operand
1224  N->getOperand(2), // Incremental
1225  Chain};
1226 
1227  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1229 
1230  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1231 
1232  // Update uses of write back register
1233  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1234 
1235  // Update uses of vector list
1236  SDValue SuperReg = SDValue(Ld, 1);
1237  if (NumVecs == 1)
1238  ReplaceUses(SDValue(N, 0), SuperReg);
1239  else
1240  for (unsigned i = 0; i < NumVecs; ++i)
1241  ReplaceUses(SDValue(N, i),
1242  CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1243 
1244  // Update the chain
1245  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1246  CurDAG->RemoveDeadNode(N);
1247 }
1248 
1249 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
1250  unsigned Opc) {
1251  SDLoc dl(N);
1252  EVT VT = N->getOperand(2)->getValueType(0);
1253 
1254  // Form a REG_SEQUENCE to force register allocation.
1255  bool Is128Bit = VT.getSizeInBits() == 128;
1256  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1257  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1258 
1259  SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
1260  SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
1261 
1262  // Transfer memoperands.
1263  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1264  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1265 
1266  ReplaceNode(N, St);
1267 }
1268 
1269 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
1270  unsigned Opc) {
1271  SDLoc dl(N);
1272  EVT VT = N->getOperand(2)->getValueType(0);
1273  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1274  MVT::Other}; // Type for the Chain
1275 
1276  // Form a REG_SEQUENCE to force register allocation.
1277  bool Is128Bit = VT.getSizeInBits() == 128;
1278  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1279  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1280 
1281  SDValue Ops[] = {RegSeq,
1282  N->getOperand(NumVecs + 1), // base register
1283  N->getOperand(NumVecs + 2), // Incremental
1284  N->getOperand(0)}; // Chain
1285  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1286 
1287  ReplaceNode(N, St);
1288 }
1289 
1290 namespace {
1291 /// WidenVector - Given a value in the V64 register class, produce the
1292 /// equivalent value in the V128 register class.
1293 class WidenVector {
1294  SelectionDAG &DAG;
1295 
1296 public:
1297  WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
1298 
1299  SDValue operator()(SDValue V64Reg) {
1300  EVT VT = V64Reg.getValueType();
1301  unsigned NarrowSize = VT.getVectorNumElements();
1302  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1303  MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
1304  SDLoc DL(V64Reg);
1305 
1306  SDValue Undef =
1307  SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
1308  return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
1309  }
1310 };
1311 } // namespace
1312 
1313 /// NarrowVector - Given a value in the V128 register class, produce the
1314 /// equivalent value in the V64 register class.
1315 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
1316  EVT VT = V128Reg.getValueType();
1317  unsigned WideSize = VT.getVectorNumElements();
1318  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1319  MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
1320 
1321  return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
1322  V128Reg);
1323 }
1324 
1325 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
1326  unsigned Opc) {
1327  SDLoc dl(N);
1328  EVT VT = N->getValueType(0);
1329  bool Narrow = VT.getSizeInBits() == 64;
1330 
1331  // Form a REG_SEQUENCE to force register allocation.
1332  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1333 
1334  if (Narrow)
1335  transform(Regs, Regs.begin(),
1336  WidenVector(*CurDAG));
1337 
1338  SDValue RegSeq = createQTuple(Regs);
1339 
1340  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1341 
1342  unsigned LaneNo =
1343  cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1344 
1345  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1346  N->getOperand(NumVecs + 3), N->getOperand(0)};
1347  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1348  SDValue SuperReg = SDValue(Ld, 0);
1349 
1350  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1351  static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1352  AArch64::qsub2, AArch64::qsub3 };
1353  for (unsigned i = 0; i < NumVecs; ++i) {
1354  SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
1355  if (Narrow)
1356  NV = NarrowVector(NV, *CurDAG);
1357  ReplaceUses(SDValue(N, i), NV);
1358  }
1359 
1360  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1361  CurDAG->RemoveDeadNode(N);
1362 }
1363 
1364 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
1365  unsigned Opc) {
1366  SDLoc dl(N);
1367  EVT VT = N->getValueType(0);
1368  bool Narrow = VT.getSizeInBits() == 64;
1369 
1370  // Form a REG_SEQUENCE to force register allocation.
1371  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1372 
1373  if (Narrow)
1374  transform(Regs, Regs.begin(),
1375  WidenVector(*CurDAG));
1376 
1377  SDValue RegSeq = createQTuple(Regs);
1378 
1379  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1380  RegSeq->getValueType(0), MVT::Other};
1381 
1382  unsigned LaneNo =
1383  cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1384 
1385  SDValue Ops[] = {RegSeq,
1386  CurDAG->getTargetConstant(LaneNo, dl,
1387  MVT::i64), // Lane Number
1388  N->getOperand(NumVecs + 2), // Base register
1389  N->getOperand(NumVecs + 3), // Incremental
1390  N->getOperand(0)};
1391  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1392 
1393  // Update uses of the write back register
1394  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1395 
1396  // Update uses of the vector list
1397  SDValue SuperReg = SDValue(Ld, 1);
1398  if (NumVecs == 1) {
1399  ReplaceUses(SDValue(N, 0),
1400  Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
1401  } else {
1402  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1403  static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1404  AArch64::qsub2, AArch64::qsub3 };
1405  for (unsigned i = 0; i < NumVecs; ++i) {
1406  SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
1407  SuperReg);
1408  if (Narrow)
1409  NV = NarrowVector(NV, *CurDAG);
1410  ReplaceUses(SDValue(N, i), NV);
1411  }
1412  }
1413 
1414  // Update the Chain
1415  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1416  CurDAG->RemoveDeadNode(N);
1417 }
1418 
1419 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
1420  unsigned Opc) {
1421  SDLoc dl(N);
1422  EVT VT = N->getOperand(2)->getValueType(0);
1423  bool Narrow = VT.getSizeInBits() == 64;
1424 
1425  // Form a REG_SEQUENCE to force register allocation.
1426  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1427 
1428  if (Narrow)
1429  transform(Regs, Regs.begin(),
1430  WidenVector(*CurDAG));
1431 
1432  SDValue RegSeq = createQTuple(Regs);
1433 
1434  unsigned LaneNo =
1435  cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1436 
1437  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1438  N->getOperand(NumVecs + 3), N->getOperand(0)};
1439  SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
1440 
1441  // Transfer memoperands.
1442  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1443  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1444 
1445  ReplaceNode(N, St);
1446 }
1447 
1448 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
1449  unsigned Opc) {
1450  SDLoc dl(N);
1451  EVT VT = N->getOperand(2)->getValueType(0);
1452  bool Narrow = VT.getSizeInBits() == 64;
1453 
1454  // Form a REG_SEQUENCE to force register allocation.
1455  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1456 
1457  if (Narrow)
1458  transform(Regs, Regs.begin(),
1459  WidenVector(*CurDAG));
1460 
1461  SDValue RegSeq = createQTuple(Regs);
1462 
1463  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1464  MVT::Other};
1465 
1466  unsigned LaneNo =
1467  cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1468 
1469  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1470  N->getOperand(NumVecs + 2), // Base Register
1471  N->getOperand(NumVecs + 3), // Incremental
1472  N->getOperand(0)};
1473  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1474 
1475  // Transfer memoperands.
1476  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1477  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1478 
1479  ReplaceNode(N, St);
1480 }
1481 
1483  unsigned &Opc, SDValue &Opd0,
1484  unsigned &LSB, unsigned &MSB,
1485  unsigned NumberOfIgnoredLowBits,
1486  bool BiggerPattern) {
1487  assert(N->getOpcode() == ISD::AND &&
1488  "N must be a AND operation to call this function");
1489 
1490  EVT VT = N->getValueType(0);
1491 
1492  // Here we can test the type of VT and return false when the type does not
1493  // match, but since it is done prior to that call in the current context
1494  // we turned that into an assert to avoid redundant code.
1495  assert((VT == MVT::i32 || VT == MVT::i64) &&
1496  "Type checking must have been done before calling this function");
1497 
1498  // FIXME: simplify-demanded-bits in DAGCombine will probably have
1499  // changed the AND node to a 32-bit mask operation. We'll have to
1500  // undo that as part of the transform here if we want to catch all
1501  // the opportunities.
1502  // Currently the NumberOfIgnoredLowBits argument helps to recover
1503  // form these situations when matching bigger pattern (bitfield insert).
1504 
1505  // For unsigned extracts, check for a shift right and mask
1506  uint64_t AndImm = 0;
1507  if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
1508  return false;
1509 
1510  const SDNode *Op0 = N->getOperand(0).getNode();
1511 
1512  // Because of simplify-demanded-bits in DAGCombine, the mask may have been
1513  // simplified. Try to undo that
1514  AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
1515 
1516  // The immediate is a mask of the low bits iff imm & (imm+1) == 0
1517  if (AndImm & (AndImm + 1))
1518  return false;
1519 
1520  bool ClampMSB = false;
1521  uint64_t SrlImm = 0;
1522  // Handle the SRL + ANY_EXTEND case.
1523  if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
1524  isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
1525  // Extend the incoming operand of the SRL to 64-bit.
1526  Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
1527  // Make sure to clamp the MSB so that we preserve the semantics of the
1528  // original operations.
1529  ClampMSB = true;
1530  } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
1532  SrlImm)) {
1533  // If the shift result was truncated, we can still combine them.
1534  Opd0 = Op0->getOperand(0).getOperand(0);
1535 
1536  // Use the type of SRL node.
1537  VT = Opd0->getValueType(0);
1538  } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
1539  Opd0 = Op0->getOperand(0);
1540  } else if (BiggerPattern) {
1541  // Let's pretend a 0 shift right has been performed.
1542  // The resulting code will be at least as good as the original one
1543  // plus it may expose more opportunities for bitfield insert pattern.
1544  // FIXME: Currently we limit this to the bigger pattern, because
1545  // some optimizations expect AND and not UBFM.
1546  Opd0 = N->getOperand(0);
1547  } else
1548  return false;
1549 
1550  // Bail out on large immediates. This happens when no proper
1551  // combining/constant folding was performed.
1552  if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
1553  LLVM_DEBUG(
1554  (dbgs() << N
1555  << ": Found large shift immediate, this should not happen\n"));
1556  return false;
1557  }
1558 
1559  LSB = SrlImm;
1560  MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm)
1561  : countTrailingOnes<uint64_t>(AndImm)) -
1562  1;
1563  if (ClampMSB)
1564  // Since we're moving the extend before the right shift operation, we need
1565  // to clamp the MSB to make sure we don't shift in undefined bits instead of
1566  // the zeros which would get shifted in with the original right shift
1567  // operation.
1568  MSB = MSB > 31 ? 31 : MSB;
1569 
1570  Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
1571  return true;
1572 }
1573 
1574 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
1575  SDValue &Opd0, unsigned &Immr,
1576  unsigned &Imms) {
1578 
1579  EVT VT = N->getValueType(0);
1580  unsigned BitWidth = VT.getSizeInBits();
1581  assert((VT == MVT::i32 || VT == MVT::i64) &&
1582  "Type checking must have been done before calling this function");
1583 
1584  SDValue Op = N->getOperand(0);
1585  if (Op->getOpcode() == ISD::TRUNCATE) {
1586  Op = Op->getOperand(0);
1587  VT = Op->getValueType(0);
1588  BitWidth = VT.getSizeInBits();
1589  }
1590 
1591  uint64_t ShiftImm;
1592  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
1593  !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
1594  return false;
1595 
1596  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1597  if (ShiftImm + Width > BitWidth)
1598  return false;
1599 
1600  Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
1601  Opd0 = Op.getOperand(0);
1602  Immr = ShiftImm;
1603  Imms = ShiftImm + Width - 1;
1604  return true;
1605 }
1606 
1607 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
1608  SDValue &Opd0, unsigned &LSB,
1609  unsigned &MSB) {
1610  // We are looking for the following pattern which basically extracts several
1611  // continuous bits from the source value and places it from the LSB of the
1612  // destination value, all other bits of the destination value or set to zero:
1613  //
1614  // Value2 = AND Value, MaskImm
1615  // SRL Value2, ShiftImm
1616  //
1617  // with MaskImm >> ShiftImm to search for the bit width.
1618  //
1619  // This gets selected into a single UBFM:
1620  //
1621  // UBFM Value, ShiftImm, BitWide + SrlImm -1
1622  //
1623 
1624  if (N->getOpcode() != ISD::SRL)
1625  return false;
1626 
1627  uint64_t AndMask = 0;
1628  if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
1629  return false;
1630 
1631  Opd0 = N->getOperand(0).getOperand(0);
1632 
1633  uint64_t SrlImm = 0;
1634  if (!isIntImmediate(N->getOperand(1), SrlImm))
1635  return false;
1636 
1637  // Check whether we really have several bits extract here.
1638  unsigned BitWide = 64 - countLeadingOnes(~(AndMask >> SrlImm));
1639  if (BitWide && isMask_64(AndMask >> SrlImm)) {
1640  if (N->getValueType(0) == MVT::i32)
1641  Opc = AArch64::UBFMWri;
1642  else
1643  Opc = AArch64::UBFMXri;
1644 
1645  LSB = SrlImm;
1646  MSB = BitWide + SrlImm - 1;
1647  return true;
1648  }
1649 
1650  return false;
1651 }
1652 
1653 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
1654  unsigned &Immr, unsigned &Imms,
1655  bool BiggerPattern) {
1656  assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
1657  "N must be a SHR/SRA operation to call this function");
1658 
1659  EVT VT = N->getValueType(0);
1660 
1661  // Here we can test the type of VT and return false when the type does not
1662  // match, but since it is done prior to that call in the current context
1663  // we turned that into an assert to avoid redundant code.
1664  assert((VT == MVT::i32 || VT == MVT::i64) &&
1665  "Type checking must have been done before calling this function");
1666 
1667  // Check for AND + SRL doing several bits extract.
1668  if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
1669  return true;
1670 
1671  // We're looking for a shift of a shift.
1672  uint64_t ShlImm = 0;
1673  uint64_t TruncBits = 0;
1674  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
1675  Opd0 = N->getOperand(0).getOperand(0);
1676  } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
1677  N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
1678  // We are looking for a shift of truncate. Truncate from i64 to i32 could
1679  // be considered as setting high 32 bits as zero. Our strategy here is to
1680  // always generate 64bit UBFM. This consistency will help the CSE pass
1681  // later find more redundancy.
1682  Opd0 = N->getOperand(0).getOperand(0);
1683  TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
1684  VT = Opd0.getValueType();
1685  assert(VT == MVT::i64 && "the promoted type should be i64");
1686  } else if (BiggerPattern) {
1687  // Let's pretend a 0 shift left has been performed.
1688  // FIXME: Currently we limit this to the bigger pattern case,
1689  // because some optimizations expect AND and not UBFM
1690  Opd0 = N->getOperand(0);
1691  } else
1692  return false;
1693 
1694  // Missing combines/constant folding may have left us with strange
1695  // constants.
1696  if (ShlImm >= VT.getSizeInBits()) {
1697  LLVM_DEBUG(
1698  (dbgs() << N
1699  << ": Found large shift immediate, this should not happen\n"));
1700  return false;
1701  }
1702 
1703  uint64_t SrlImm = 0;
1704  if (!isIntImmediate(N->getOperand(1), SrlImm))
1705  return false;
1706 
1707  assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
1708  "bad amount in shift node!");
1709  int immr = SrlImm - ShlImm;
1710  Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
1711  Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
1712  // SRA requires a signed extraction
1713  if (VT == MVT::i32)
1714  Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
1715  else
1716  Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
1717  return true;
1718 }
1719 
1720 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
1722 
1723  EVT VT = N->getValueType(0);
1724  EVT NarrowVT = N->getOperand(0)->getValueType(0);
1725  if (VT != MVT::i64 || NarrowVT != MVT::i32)
1726  return false;
1727 
1728  uint64_t ShiftImm;
1729  SDValue Op = N->getOperand(0);
1730  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
1731  return false;
1732 
1733  SDLoc dl(N);
1734  // Extend the incoming operand of the shift to 64-bits.
1735  SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
1736  unsigned Immr = ShiftImm;
1737  unsigned Imms = NarrowVT.getSizeInBits() - 1;
1738  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
1739  CurDAG->getTargetConstant(Imms, dl, VT)};
1740  CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
1741  return true;
1742 }
1743 
1744 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
1745  SDValue &Opd0, unsigned &Immr, unsigned &Imms,
1746  unsigned NumberOfIgnoredLowBits = 0,
1747  bool BiggerPattern = false) {
1748  if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
1749  return false;
1750 
1751  switch (N->getOpcode()) {
1752  default:
1753  if (!N->isMachineOpcode())
1754  return false;
1755  break;
1756  case ISD::AND:
1757  return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
1758  NumberOfIgnoredLowBits, BiggerPattern);
1759  case ISD::SRL:
1760  case ISD::SRA:
1761  return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
1762 
1764  return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
1765  }
1766 
1767  unsigned NOpc = N->getMachineOpcode();
1768  switch (NOpc) {
1769  default:
1770  return false;
1771  case AArch64::SBFMWri:
1772  case AArch64::UBFMWri:
1773  case AArch64::SBFMXri:
1774  case AArch64::UBFMXri:
1775  Opc = NOpc;
1776  Opd0 = N->getOperand(0);
1777  Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
1778  Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
1779  return true;
1780  }
1781  // Unreachable
1782  return false;
1783 }
1784 
1785 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
1786  unsigned Opc, Immr, Imms;
1787  SDValue Opd0;
1788  if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
1789  return false;
1790 
1791  EVT VT = N->getValueType(0);
1792  SDLoc dl(N);
1793 
1794  // If the bit extract operation is 64bit but the original type is 32bit, we
1795  // need to add one EXTRACT_SUBREG.
1796  if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
1797  SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
1798  CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
1799 
1800  SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
1801  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1802  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
1803  MVT::i32, SDValue(BFM, 0), SubReg));
1804  return true;
1805  }
1806 
1807  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
1808  CurDAG->getTargetConstant(Imms, dl, VT)};
1809  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
1810  return true;
1811 }
1812 
1813 /// Does DstMask form a complementary pair with the mask provided by
1814 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
1815 /// this asks whether DstMask zeroes precisely those bits that will be set by
1816 /// the other half.
1817 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
1818  unsigned NumberOfIgnoredHighBits, EVT VT) {
1819  assert((VT == MVT::i32 || VT == MVT::i64) &&
1820  "i32 or i64 mask type expected!");
1821  unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
1822 
1823  APInt SignificantDstMask = APInt(BitWidth, DstMask);
1824  APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
1825 
1826  return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
1827  (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue();
1828 }
1829 
1830 // Look for bits that will be useful for later uses.
1831 // A bit is consider useless as soon as it is dropped and never used
1832 // before it as been dropped.
1833 // E.g., looking for useful bit of x
1834 // 1. y = x & 0x7
1835 // 2. z = y >> 2
1836 // After #1, x useful bits are 0x7, then the useful bits of x, live through
1837 // y.
1838 // After #2, the useful bits of x are 0x4.
1839 // However, if x is used on an unpredicatable instruction, then all its bits
1840 // are useful.
1841 // E.g.
1842 // 1. y = x & 0x7
1843 // 2. z = y >> 2
1844 // 3. str x, [@x]
1845 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
1846 
1848  unsigned Depth) {
1849  uint64_t Imm =
1850  cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1851  Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
1852  UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
1853  getUsefulBits(Op, UsefulBits, Depth + 1);
1854 }
1855 
1856 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
1857  uint64_t Imm, uint64_t MSB,
1858  unsigned Depth) {
1859  // inherit the bitwidth value
1860  APInt OpUsefulBits(UsefulBits);
1861  OpUsefulBits = 1;
1862 
1863  if (MSB >= Imm) {
1864  OpUsefulBits <<= MSB - Imm + 1;
1865  --OpUsefulBits;
1866  // The interesting part will be in the lower part of the result
1867  getUsefulBits(Op, OpUsefulBits, Depth + 1);
1868  // The interesting part was starting at Imm in the argument
1869  OpUsefulBits <<= Imm;
1870  } else {
1871  OpUsefulBits <<= MSB + 1;
1872  --OpUsefulBits;
1873  // The interesting part will be shifted in the result
1874  OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
1875  getUsefulBits(Op, OpUsefulBits, Depth + 1);
1876  // The interesting part was at zero in the argument
1877  OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
1878  }
1879 
1880  UsefulBits &= OpUsefulBits;
1881 }
1882 
1883 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
1884  unsigned Depth) {
1885  uint64_t Imm =
1886  cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1887  uint64_t MSB =
1888  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1889 
1890  getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
1891 }
1892 
1894  unsigned Depth) {
1895  uint64_t ShiftTypeAndValue =
1896  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1897  APInt Mask(UsefulBits);
1898  Mask.clearAllBits();
1899  Mask.flipAllBits();
1900 
1901  if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
1902  // Shift Left
1903  uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1904  Mask <<= ShiftAmt;
1905  getUsefulBits(Op, Mask, Depth + 1);
1906  Mask.lshrInPlace(ShiftAmt);
1907  } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
1908  // Shift Right
1909  // We do not handle AArch64_AM::ASR, because the sign will change the
1910  // number of useful bits
1911  uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1912  Mask.lshrInPlace(ShiftAmt);
1913  getUsefulBits(Op, Mask, Depth + 1);
1914  Mask <<= ShiftAmt;
1915  } else
1916  return;
1917 
1918  UsefulBits &= Mask;
1919 }
1920 
1921 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
1922  unsigned Depth) {
1923  uint64_t Imm =
1924  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1925  uint64_t MSB =
1926  cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
1927 
1928  APInt OpUsefulBits(UsefulBits);
1929  OpUsefulBits = 1;
1930 
1931  APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
1932  ResultUsefulBits.flipAllBits();
1933  APInt Mask(UsefulBits.getBitWidth(), 0);
1934 
1935  getUsefulBits(Op, ResultUsefulBits, Depth + 1);
1936 
1937  if (MSB >= Imm) {
1938  // The instruction is a BFXIL.
1939  uint64_t Width = MSB - Imm + 1;
1940  uint64_t LSB = Imm;
1941 
1942  OpUsefulBits <<= Width;
1943  --OpUsefulBits;
1944 
1945  if (Op.getOperand(1) == Orig) {
1946  // Copy the low bits from the result to bits starting from LSB.
1947  Mask = ResultUsefulBits & OpUsefulBits;
1948  Mask <<= LSB;
1949  }
1950 
1951  if (Op.getOperand(0) == Orig)
1952  // Bits starting from LSB in the input contribute to the result.
1953  Mask |= (ResultUsefulBits & ~OpUsefulBits);
1954  } else {
1955  // The instruction is a BFI.
1956  uint64_t Width = MSB + 1;
1957  uint64_t LSB = UsefulBits.getBitWidth() - Imm;
1958 
1959  OpUsefulBits <<= Width;
1960  --OpUsefulBits;
1961  OpUsefulBits <<= LSB;
1962 
1963  if (Op.getOperand(1) == Orig) {
1964  // Copy the bits from the result to the zero bits.
1965  Mask = ResultUsefulBits & OpUsefulBits;
1966  Mask.lshrInPlace(LSB);
1967  }
1968 
1969  if (Op.getOperand(0) == Orig)
1970  Mask |= (ResultUsefulBits & ~OpUsefulBits);
1971  }
1972 
1973  UsefulBits &= Mask;
1974 }
1975 
1976 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
1977  SDValue Orig, unsigned Depth) {
1978 
1979  // Users of this node should have already been instruction selected
1980  // FIXME: Can we turn that into an assert?
1981  if (!UserNode->isMachineOpcode())
1982  return;
1983 
1984  switch (UserNode->getMachineOpcode()) {
1985  default:
1986  return;
1987  case AArch64::ANDSWri:
1988  case AArch64::ANDSXri:
1989  case AArch64::ANDWri:
1990  case AArch64::ANDXri:
1991  // We increment Depth only when we call the getUsefulBits
1992  return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
1993  Depth);
1994  case AArch64::UBFMWri:
1995  case AArch64::UBFMXri:
1996  return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
1997 
1998  case AArch64::ORRWrs:
1999  case AArch64::ORRXrs:
2000  if (UserNode->getOperand(1) != Orig)
2001  return;
2002  return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
2003  Depth);
2004  case AArch64::BFMWri:
2005  case AArch64::BFMXri:
2006  return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
2007 
2008  case AArch64::STRBBui:
2009  case AArch64::STURBBi:
2010  if (UserNode->getOperand(0) != Orig)
2011  return;
2012  UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
2013  return;
2014 
2015  case AArch64::STRHHui:
2016  case AArch64::STURHHi:
2017  if (UserNode->getOperand(0) != Orig)
2018  return;
2019  UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
2020  return;
2021  }
2022 }
2023 
2024 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2025  if (Depth >= 6)
2026  return;
2027  // Initialize UsefulBits
2028  if (!Depth) {
2029  unsigned Bitwidth = Op.getScalarValueSizeInBits();
2030  // At the beginning, assume every produced bits is useful
2031  UsefulBits = APInt(Bitwidth, 0);
2032  UsefulBits.flipAllBits();
2033  }
2034  APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
2035 
2036  for (SDNode *Node : Op.getNode()->uses()) {
2037  // A use cannot produce useful bits
2038  APInt UsefulBitsForUse = APInt(UsefulBits);
2039  getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
2040  UsersUsefulBits |= UsefulBitsForUse;
2041  }
2042  // UsefulBits contains the produced bits that are meaningful for the
2043  // current definition, thus a user cannot make a bit meaningful at
2044  // this point
2045  UsefulBits &= UsersUsefulBits;
2046 }
2047 
2048 /// Create a machine node performing a notional SHL of Op by ShlAmount. If
2049 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
2050 /// 0, return Op unchanged.
2051 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
2052  if (ShlAmount == 0)
2053  return Op;
2054 
2055  EVT VT = Op.getValueType();
2056  SDLoc dl(Op);
2057  unsigned BitWidth = VT.getSizeInBits();
2058  unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2059 
2060  SDNode *ShiftNode;
2061  if (ShlAmount > 0) {
2062  // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
2063  ShiftNode = CurDAG->getMachineNode(
2064  UBFMOpc, dl, VT, Op,
2065  CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
2066  CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
2067  } else {
2068  // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
2069  assert(ShlAmount < 0 && "expected right shift");
2070  int ShrAmount = -ShlAmount;
2071  ShiftNode = CurDAG->getMachineNode(
2072  UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
2073  CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
2074  }
2075 
2076  return SDValue(ShiftNode, 0);
2077 }
2078 
2079 /// Does this tree qualify as an attempt to move a bitfield into position,
2080 /// essentially "(and (shl VAL, N), Mask)".
2082  bool BiggerPattern,
2083  SDValue &Src, int &ShiftAmount,
2084  int &MaskWidth) {
2085  EVT VT = Op.getValueType();
2086  unsigned BitWidth = VT.getSizeInBits();
2087  (void)BitWidth;
2088  assert(BitWidth == 32 || BitWidth == 64);
2089 
2090  KnownBits Known = CurDAG->computeKnownBits(Op);
2091 
2092  // Non-zero in the sense that they're not provably zero, which is the key
2093  // point if we want to use this value
2094  uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
2095 
2096  // Discard a constant AND mask if present. It's safe because the node will
2097  // already have been factored into the computeKnownBits calculation above.
2098  uint64_t AndImm;
2099  if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) {
2100  assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0);
2101  Op = Op.getOperand(0);
2102  }
2103 
2104  // Don't match if the SHL has more than one use, since then we'll end up
2105  // generating SHL+UBFIZ instead of just keeping SHL+AND.
2106  if (!BiggerPattern && !Op.hasOneUse())
2107  return false;
2108 
2109  uint64_t ShlImm;
2110  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
2111  return false;
2112  Op = Op.getOperand(0);
2113 
2114  if (!isShiftedMask_64(NonZeroBits))
2115  return false;
2116 
2117  ShiftAmount = countTrailingZeros(NonZeroBits);
2118  MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount);
2119 
2120  // BFI encompasses sufficiently many nodes that it's worth inserting an extra
2121  // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
2122  // amount. BiggerPattern is true when this pattern is being matched for BFI,
2123  // BiggerPattern is false when this pattern is being matched for UBFIZ, in
2124  // which case it is not profitable to insert an extra shift.
2125  if (ShlImm - ShiftAmount != 0 && !BiggerPattern)
2126  return false;
2127  Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
2128 
2129  return true;
2130 }
2131 
2132 static bool isShiftedMask(uint64_t Mask, EVT VT) {
2133  assert(VT == MVT::i32 || VT == MVT::i64);
2134  if (VT == MVT::i32)
2135  return isShiftedMask_32(Mask);
2136  return isShiftedMask_64(Mask);
2137 }
2138 
2139 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
2140 // inserted only sets known zero bits.
2142  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2143 
2144  EVT VT = N->getValueType(0);
2145  if (VT != MVT::i32 && VT != MVT::i64)
2146  return false;
2147 
2148  unsigned BitWidth = VT.getSizeInBits();
2149 
2150  uint64_t OrImm;
2151  if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
2152  return false;
2153 
2154  // Skip this transformation if the ORR immediate can be encoded in the ORR.
2155  // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
2156  // performance neutral.
2157  if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth))
2158  return false;
2159 
2160  uint64_t MaskImm;
2161  SDValue And = N->getOperand(0);
2162  // Must be a single use AND with an immediate operand.
2163  if (!And.hasOneUse() ||
2164  !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
2165  return false;
2166 
2167  // Compute the Known Zero for the AND as this allows us to catch more general
2168  // cases than just looking for AND with imm.
2169  KnownBits Known = CurDAG->computeKnownBits(And);
2170 
2171  // Non-zero in the sense that they're not provably zero, which is the key
2172  // point if we want to use this value.
2173  uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
2174 
2175  // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
2176  if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
2177  return false;
2178 
2179  // The bits being inserted must only set those bits that are known to be zero.
2180  if ((OrImm & NotKnownZero) != 0) {
2181  // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
2182  // currently handle this case.
2183  return false;
2184  }
2185 
2186  // BFI/BFXIL dst, src, #lsb, #width.
2187  int LSB = countTrailingOnes(NotKnownZero);
2188  int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation();
2189 
2190  // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
2191  unsigned ImmR = (BitWidth - LSB) % BitWidth;
2192  unsigned ImmS = Width - 1;
2193 
2194  // If we're creating a BFI instruction avoid cases where we need more
2195  // instructions to materialize the BFI constant as compared to the original
2196  // ORR. A BFXIL will use the same constant as the original ORR, so the code
2197  // should be no worse in this case.
2198  bool IsBFI = LSB != 0;
2199  uint64_t BFIImm = OrImm >> LSB;
2200  if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
2201  // We have a BFI instruction and we know the constant can't be materialized
2202  // with a ORR-immediate with the zero register.
2203  unsigned OrChunks = 0, BFIChunks = 0;
2204  for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
2205  if (((OrImm >> Shift) & 0xFFFF) != 0)
2206  ++OrChunks;
2207  if (((BFIImm >> Shift) & 0xFFFF) != 0)
2208  ++BFIChunks;
2209  }
2210  if (BFIChunks > OrChunks)
2211  return false;
2212  }
2213 
2214  // Materialize the constant to be inserted.
2215  SDLoc DL(N);
2216  unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
2217  SDNode *MOVI = CurDAG->getMachineNode(
2218  MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
2219 
2220  // Create the BFI/BFXIL instruction.
2221  SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
2222  CurDAG->getTargetConstant(ImmR, DL, VT),
2223  CurDAG->getTargetConstant(ImmS, DL, VT)};
2224  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2225  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2226  return true;
2227 }
2228 
2229 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
2230  SelectionDAG *CurDAG) {
2231  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2232 
2233  EVT VT = N->getValueType(0);
2234  if (VT != MVT::i32 && VT != MVT::i64)
2235  return false;
2236 
2237  unsigned BitWidth = VT.getSizeInBits();
2238 
2239  // Because of simplify-demanded-bits in DAGCombine, involved masks may not
2240  // have the expected shape. Try to undo that.
2241 
2242  unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
2243  unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
2244 
2245  // Given a OR operation, check if we have the following pattern
2246  // ubfm c, b, imm, imm2 (or something that does the same jobs, see
2247  // isBitfieldExtractOp)
2248  // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
2249  // countTrailingZeros(mask2) == imm2 - imm + 1
2250  // f = d | c
2251  // if yes, replace the OR instruction with:
2252  // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
2253 
2254  // OR is commutative, check all combinations of operand order and values of
2255  // BiggerPattern, i.e.
2256  // Opd0, Opd1, BiggerPattern=false
2257  // Opd1, Opd0, BiggerPattern=false
2258  // Opd0, Opd1, BiggerPattern=true
2259  // Opd1, Opd0, BiggerPattern=true
2260  // Several of these combinations may match, so check with BiggerPattern=false
2261  // first since that will produce better results by matching more instructions
2262  // and/or inserting fewer extra instructions.
2263  for (int I = 0; I < 4; ++I) {
2264 
2265  SDValue Dst, Src;
2266  unsigned ImmR, ImmS;
2267  bool BiggerPattern = I / 2;
2268  SDValue OrOpd0Val = N->getOperand(I % 2);
2269  SDNode *OrOpd0 = OrOpd0Val.getNode();
2270  SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
2271  SDNode *OrOpd1 = OrOpd1Val.getNode();
2272 
2273  unsigned BFXOpc;
2274  int DstLSB, Width;
2275  if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
2276  NumberOfIgnoredLowBits, BiggerPattern)) {
2277  // Check that the returned opcode is compatible with the pattern,
2278  // i.e., same type and zero extended (U and not S)
2279  if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
2280  (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
2281  continue;
2282 
2283  // Compute the width of the bitfield insertion
2284  DstLSB = 0;
2285  Width = ImmS - ImmR + 1;
2286  // FIXME: This constraint is to catch bitfield insertion we may
2287  // want to widen the pattern if we want to grab general bitfied
2288  // move case
2289  if (Width <= 0)
2290  continue;
2291 
2292  // If the mask on the insertee is correct, we have a BFXIL operation. We
2293  // can share the ImmR and ImmS values from the already-computed UBFM.
2294  } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
2295  BiggerPattern,
2296  Src, DstLSB, Width)) {
2297  ImmR = (BitWidth - DstLSB) % BitWidth;
2298  ImmS = Width - 1;
2299  } else
2300  continue;
2301 
2302  // Check the second part of the pattern
2303  EVT VT = OrOpd1Val.getValueType();
2304  assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
2305 
2306  // Compute the Known Zero for the candidate of the first operand.
2307  // This allows to catch more general case than just looking for
2308  // AND with imm. Indeed, simplify-demanded-bits may have removed
2309  // the AND instruction because it proves it was useless.
2310  KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
2311 
2312  // Check if there is enough room for the second operand to appear
2313  // in the first one
2314  APInt BitsToBeInserted =
2315  APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
2316 
2317  if ((BitsToBeInserted & ~Known.Zero) != 0)
2318  continue;
2319 
2320  // Set the first operand
2321  uint64_t Imm;
2322  if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
2323  isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
2324  // In that case, we can eliminate the AND
2325  Dst = OrOpd1->getOperand(0);
2326  else
2327  // Maybe the AND has been removed by simplify-demanded-bits
2328  // or is useful because it discards more bits
2329  Dst = OrOpd1Val;
2330 
2331  // both parts match
2332  SDLoc DL(N);
2333  SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
2334  CurDAG->getTargetConstant(ImmS, DL, VT)};
2335  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2336  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2337  return true;
2338  }
2339 
2340  // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
2341  // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
2342  // mask (e.g., 0x000ffff0).
2343  uint64_t Mask0Imm, Mask1Imm;
2344  SDValue And0 = N->getOperand(0);
2345  SDValue And1 = N->getOperand(1);
2346  if (And0.hasOneUse() && And1.hasOneUse() &&
2347  isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
2348  isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
2349  APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
2350  (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
2351 
2352  // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
2353  // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
2354  // bits to be inserted.
2355  if (isShiftedMask(Mask0Imm, VT)) {
2356  std::swap(And0, And1);
2357  std::swap(Mask0Imm, Mask1Imm);
2358  }
2359 
2360  SDValue Src = And1->getOperand(0);
2361  SDValue Dst = And0->getOperand(0);
2362  unsigned LSB = countTrailingZeros(Mask1Imm);
2363  int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation();
2364 
2365  // The BFXIL inserts the low-order bits from a source register, so right
2366  // shift the needed bits into place.
2367  SDLoc DL(N);
2368  unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2369  SDNode *LSR = CurDAG->getMachineNode(
2370  ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT),
2371  CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
2372 
2373  // BFXIL is an alias of BFM, so translate to BFM operands.
2374  unsigned ImmR = (BitWidth - LSB) % BitWidth;
2375  unsigned ImmS = Width - 1;
2376 
2377  // Create the BFXIL instruction.
2378  SDValue Ops[] = {Dst, SDValue(LSR, 0),
2379  CurDAG->getTargetConstant(ImmR, DL, VT),
2380  CurDAG->getTargetConstant(ImmS, DL, VT)};
2381  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2382  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2383  return true;
2384  }
2385 
2386  return false;
2387 }
2388 
2389 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
2390  if (N->getOpcode() != ISD::OR)
2391  return false;
2392 
2393  APInt NUsefulBits;
2394  getUsefulBits(SDValue(N, 0), NUsefulBits);
2395 
2396  // If all bits are not useful, just return UNDEF.
2397  if (!NUsefulBits) {
2398  CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
2399  return true;
2400  }
2401 
2402  if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
2403  return true;
2404 
2405  return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
2406 }
2407 
2408 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
2409 /// equivalent of a left shift by a constant amount followed by an and masking
2410 /// out a contiguous set of bits.
2411 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
2412  if (N->getOpcode() != ISD::AND)
2413  return false;
2414 
2415  EVT VT = N->getValueType(0);
2416  if (VT != MVT::i32 && VT != MVT::i64)
2417  return false;
2418 
2419  SDValue Op0;
2420  int DstLSB, Width;
2421  if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
2422  Op0, DstLSB, Width))
2423  return false;
2424 
2425  // ImmR is the rotate right amount.
2426  unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
2427  // ImmS is the most significant bit of the source to be moved.
2428  unsigned ImmS = Width - 1;
2429 
2430  SDLoc DL(N);
2431  SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
2432  CurDAG->getTargetConstant(ImmS, DL, VT)};
2433  unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2434  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2435  return true;
2436 }
2437 
2438 /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
2439 /// variable shift/rotate instructions.
2440 bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
2441  EVT VT = N->getValueType(0);
2442 
2443  unsigned Opc;
2444  switch (N->getOpcode()) {
2445  case ISD::ROTR:
2446  Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
2447  break;
2448  case ISD::SHL:
2449  Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
2450  break;
2451  case ISD::SRL:
2452  Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
2453  break;
2454  case ISD::SRA:
2455  Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
2456  break;
2457  default:
2458  return false;
2459  }
2460 
2461  uint64_t Size;
2462  uint64_t Bits;
2463  if (VT == MVT::i32) {
2464  Bits = 5;
2465  Size = 32;
2466  } else if (VT == MVT::i64) {
2467  Bits = 6;
2468  Size = 64;
2469  } else
2470  return false;
2471 
2472  SDValue ShiftAmt = N->getOperand(1);
2473  SDLoc DL(N);
2474  SDValue NewShiftAmt;
2475 
2476  // Skip over an extend of the shift amount.
2477  if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
2478  ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
2479  ShiftAmt = ShiftAmt->getOperand(0);
2480 
2481  if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
2482  SDValue Add0 = ShiftAmt->getOperand(0);
2483  SDValue Add1 = ShiftAmt->getOperand(1);
2484  uint64_t Add0Imm;
2485  uint64_t Add1Imm;
2486  // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
2487  // to avoid the ADD/SUB.
2488  if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0))
2489  NewShiftAmt = Add0;
2490  // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2491  // generate a NEG instead of a SUB of a constant.
2492  else if (ShiftAmt->getOpcode() == ISD::SUB &&
2493  isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
2494  (Add0Imm % Size == 0)) {
2495  unsigned NegOpc;
2496  unsigned ZeroReg;
2497  EVT SubVT = ShiftAmt->getValueType(0);
2498  if (SubVT == MVT::i32) {
2499  NegOpc = AArch64::SUBWrr;
2500  ZeroReg = AArch64::WZR;
2501  } else {
2502  assert(SubVT == MVT::i64);
2503  NegOpc = AArch64::SUBXrr;
2504  ZeroReg = AArch64::XZR;
2505  }
2506  SDValue Zero =
2507  CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
2508  MachineSDNode *Neg =
2509  CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
2510  NewShiftAmt = SDValue(Neg, 0);
2511  } else
2512  return false;
2513  } else {
2514  // If the shift amount is masked with an AND, check that the mask covers the
2515  // bits that are implicitly ANDed off by the above opcodes and if so, skip
2516  // the AND.
2517  uint64_t MaskImm;
2518  if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm))
2519  return false;
2520 
2521  if (countTrailingOnes(MaskImm) < Bits)
2522  return false;
2523 
2524  NewShiftAmt = ShiftAmt->getOperand(0);
2525  }
2526 
2527  // Narrow/widen the shift amount to match the size of the shift operation.
2528  if (VT == MVT::i32)
2529  NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
2530  else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
2531  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
2532  MachineSDNode *Ext = CurDAG->getMachineNode(
2533  AArch64::SUBREG_TO_REG, DL, VT,
2534  CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
2535  NewShiftAmt = SDValue(Ext, 0);
2536  }
2537 
2538  SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
2539  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2540  return true;
2541 }
2542 
2543 bool
2544 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
2545  unsigned RegWidth) {
2546  APFloat FVal(0.0);
2547  if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
2548  FVal = CN->getValueAPF();
2549  else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
2550  // Some otherwise illegal constants are allowed in this case.
2551  if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
2552  !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
2553  return false;
2554 
2555  ConstantPoolSDNode *CN =
2556  dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
2557  FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
2558  } else
2559  return false;
2560 
2561  // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
2562  // is between 1 and 32 for a destination w-register, or 1 and 64 for an
2563  // x-register.
2564  //
2565  // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
2566  // want THIS_NODE to be 2^fbits. This is much easier to deal with using
2567  // integers.
2568  bool IsExact;
2569 
2570  // fbits is between 1 and 64 in the worst-case, which means the fmul
2571  // could have 2^64 as an actual operand. Need 65 bits of precision.
2572  APSInt IntVal(65, true);
2573  FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
2574 
2575  // N.b. isPowerOf2 also checks for > 0.
2576  if (!IsExact || !IntVal.isPowerOf2()) return false;
2577  unsigned FBits = IntVal.logBase2();
2578 
2579  // Checks above should have guaranteed that we haven't lost information in
2580  // finding FBits, but it must still be in range.
2581  if (FBits == 0 || FBits > RegWidth) return false;
2582 
2583  FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
2584  return true;
2585 }
2586 
2587 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
2588 // of the string and obtains the integer values from them and combines these
2589 // into a single value to be used in the MRS/MSR instruction.
2592  RegString.split(Fields, ':');
2593 
2594  if (Fields.size() == 1)
2595  return -1;
2596 
2597  assert(Fields.size() == 5
2598  && "Invalid number of fields in read register string");
2599 
2600  SmallVector<int, 5> Ops;
2601  bool AllIntFields = true;
2602 
2603  for (StringRef Field : Fields) {
2604  unsigned IntField;
2605  AllIntFields &= !Field.getAsInteger(10, IntField);
2606  Ops.push_back(IntField);
2607  }
2608 
2609  assert(AllIntFields &&
2610  "Unexpected non-integer value in special register string.");
2611 
2612  // Need to combine the integer fields of the string into a single value
2613  // based on the bit encoding of MRS/MSR instruction.
2614  return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
2615  (Ops[3] << 3) | (Ops[4]);
2616 }
2617 
2618 // Lower the read_register intrinsic to an MRS instruction node if the special
2619 // register string argument is either of the form detailed in the ALCE (the
2620 // form described in getIntOperandsFromRegsterString) or is a named register
2621 // known by the MRS SysReg mapper.
2622 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
2623  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2624  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2625  SDLoc DL(N);
2626 
2627  int Reg = getIntOperandFromRegisterString(RegString->getString());
2628  if (Reg != -1) {
2629  ReplaceNode(N, CurDAG->getMachineNode(
2630  AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
2631  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2632  N->getOperand(0)));
2633  return true;
2634  }
2635 
2636  // Use the sysreg mapper to map the remaining possible strings to the
2637  // value for the register to be used for the instruction operand.
2638  auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
2639  if (TheReg && TheReg->Readable &&
2640  TheReg->haveFeatures(Subtarget->getFeatureBits()))
2641  Reg = TheReg->Encoding;
2642  else
2643  Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
2644 
2645  if (Reg != -1) {
2646  ReplaceNode(N, CurDAG->getMachineNode(
2647  AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
2648  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2649  N->getOperand(0)));
2650  return true;
2651  }
2652 
2653  return false;
2654 }
2655 
2656 // Lower the write_register intrinsic to an MSR instruction node if the special
2657 // register string argument is either of the form detailed in the ALCE (the
2658 // form described in getIntOperandsFromRegsterString) or is a named register
2659 // known by the MSR SysReg mapper.
2660 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
2661  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2662  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2663  SDLoc DL(N);
2664 
2665  int Reg = getIntOperandFromRegisterString(RegString->getString());
2666  if (Reg != -1) {
2667  ReplaceNode(
2668  N, CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,
2669  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2670  N->getOperand(2), N->getOperand(0)));
2671  return true;
2672  }
2673 
2674  // Check if the register was one of those allowed as the pstatefield value in
2675  // the MSR (immediate) instruction. To accept the values allowed in the
2676  // pstatefield for the MSR (immediate) instruction, we also require that an
2677  // immediate value has been provided as an argument, we know that this is
2678  // the case as it has been ensured by semantic checking.
2679  auto PMapper = AArch64PState::lookupPStateByName(RegString->getString());
2680  if (PMapper) {
2681  assert (isa<ConstantSDNode>(N->getOperand(2))
2682  && "Expected a constant integer expression.");
2683  unsigned Reg = PMapper->Encoding;
2684  uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
2685  unsigned State;
2686  if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO || Reg == AArch64PState::SSBS) {
2687  assert(Immed < 2 && "Bad imm");
2688  State = AArch64::MSRpstateImm1;
2689  } else {
2690  assert(Immed < 16 && "Bad imm");
2691  State = AArch64::MSRpstateImm4;
2692  }
2693  ReplaceNode(N, CurDAG->getMachineNode(
2694  State, DL, MVT::Other,
2695  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2696  CurDAG->getTargetConstant(Immed, DL, MVT::i16),
2697  N->getOperand(0)));
2698  return true;
2699  }
2700 
2701  // Use the sysreg mapper to attempt to map the remaining possible strings
2702  // to the value for the register to be used for the MSR (register)
2703  // instruction operand.
2704  auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
2705  if (TheReg && TheReg->Writeable &&
2706  TheReg->haveFeatures(Subtarget->getFeatureBits()))
2707  Reg = TheReg->Encoding;
2708  else
2709  Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
2710  if (Reg != -1) {
2711  ReplaceNode(N, CurDAG->getMachineNode(
2712  AArch64::MSR, DL, MVT::Other,
2713  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2714  N->getOperand(2), N->getOperand(0)));
2715  return true;
2716  }
2717 
2718  return false;
2719 }
2720 
2721 /// We've got special pseudo-instructions for these
2722 bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2723  unsigned Opcode;
2724  EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2725 
2726  // Leave IR for LSE if subtarget supports it.
2727  if (Subtarget->hasLSE()) return false;
2728 
2729  if (MemTy == MVT::i8)
2730  Opcode = AArch64::CMP_SWAP_8;
2731  else if (MemTy == MVT::i16)
2732  Opcode = AArch64::CMP_SWAP_16;
2733  else if (MemTy == MVT::i32)
2734  Opcode = AArch64::CMP_SWAP_32;
2735  else if (MemTy == MVT::i64)
2736  Opcode = AArch64::CMP_SWAP_64;
2737  else
2738  llvm_unreachable("Unknown AtomicCmpSwap type");
2739 
2740  MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
2741  SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2742  N->getOperand(0)};
2743  SDNode *CmpSwap = CurDAG->getMachineNode(
2744  Opcode, SDLoc(N),
2745  CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
2746 
2747  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
2748  CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
2749 
2750  ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2751  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2752  CurDAG->RemoveDeadNode(N);
2753 
2754  return true;
2755 }
2756 
2757 void AArch64DAGToDAGISel::Select(SDNode *Node) {
2758  // If we have a custom node, we already have selected!
2759  if (Node->isMachineOpcode()) {
2760  LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
2761  Node->setNodeId(-1);
2762  return;
2763  }
2764 
2765  // Few custom selection stuff.
2766  EVT VT = Node->getValueType(0);
2767 
2768  switch (Node->getOpcode()) {
2769  default:
2770  break;
2771 
2772  case ISD::ATOMIC_CMP_SWAP:
2773  if (SelectCMP_SWAP(Node))
2774  return;
2775  break;
2776 
2777  case ISD::READ_REGISTER:
2778  if (tryReadRegister(Node))
2779  return;
2780  break;
2781 
2782  case ISD::WRITE_REGISTER:
2783  if (tryWriteRegister(Node))
2784  return;
2785  break;
2786 
2787  case ISD::ADD:
2788  if (tryMLAV64LaneV128(Node))
2789  return;
2790  break;
2791 
2792  case ISD::LOAD: {
2793  // Try to select as an indexed load. Fall through to normal processing
2794  // if we can't.
2795  if (tryIndexedLoad(Node))
2796  return;
2797  break;
2798  }
2799 
2800  case ISD::SRL:
2801  case ISD::AND:
2802  case ISD::SRA:
2804  if (tryBitfieldExtractOp(Node))
2805  return;
2806  if (tryBitfieldInsertInZeroOp(Node))
2807  return;
2809  case ISD::ROTR:
2810  case ISD::SHL:
2811  if (tryShiftAmountMod(Node))
2812  return;
2813  break;
2814 
2815  case ISD::SIGN_EXTEND:
2816  if (tryBitfieldExtractOpFromSExt(Node))
2817  return;
2818  break;
2819 
2820  case ISD::OR:
2821  if (tryBitfieldInsertOp(Node))
2822  return;
2823  break;
2824 
2825  case ISD::EXTRACT_VECTOR_ELT: {
2826  // Extracting lane zero is a special case where we can just use a plain
2827  // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for
2828  // the rest of the compiler, especially the register allocator and copyi
2829  // propagation, to reason about, so is preferred when it's possible to
2830  // use it.
2831  ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1));
2832  // Bail and use the default Select() for non-zero lanes.
2833  if (LaneNode->getZExtValue() != 0)
2834  break;
2835  // If the element type is not the same as the result type, likewise
2836  // bail and use the default Select(), as there's more to do than just
2837  // a cross-class COPY. This catches extracts of i8 and i16 elements
2838  // since they will need an explicit zext.
2839  if (VT != Node->getOperand(0).getValueType().getVectorElementType())
2840  break;
2841  unsigned SubReg;
2842  switch (Node->getOperand(0)
2843  .getValueType()
2845  .getSizeInBits()) {
2846  default:
2847  llvm_unreachable("Unexpected vector element type!");
2848  case 64:
2849  SubReg = AArch64::dsub;
2850  break;
2851  case 32:
2852  SubReg = AArch64::ssub;
2853  break;
2854  case 16:
2855  SubReg = AArch64::hsub;
2856  break;
2857  case 8:
2858  llvm_unreachable("unexpected zext-requiring extract element!");
2859  }
2860  SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT,
2861  Node->getOperand(0));
2862  LLVM_DEBUG(dbgs() << "ISEL: Custom selection!\n=> ");
2863  LLVM_DEBUG(Extract->dumpr(CurDAG));
2864  LLVM_DEBUG(dbgs() << "\n");
2865  ReplaceNode(Node, Extract.getNode());
2866  return;
2867  }
2868  case ISD::Constant: {
2869  // Materialize zero constants as copies from WZR/XZR. This allows
2870  // the coalescer to propagate these into other instructions.
2871  ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
2872  if (ConstNode->isNullValue()) {
2873  if (VT == MVT::i32) {
2874  SDValue New = CurDAG->getCopyFromReg(
2875  CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
2876  ReplaceNode(Node, New.getNode());
2877  return;
2878  } else if (VT == MVT::i64) {
2879  SDValue New = CurDAG->getCopyFromReg(
2880  CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
2881  ReplaceNode(Node, New.getNode());
2882  return;
2883  }
2884  }
2885  break;
2886  }
2887 
2888  case ISD::FrameIndex: {
2889  // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
2890  int FI = cast<FrameIndexSDNode>(Node)->getIndex();
2891  unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
2892  const TargetLowering *TLI = getTargetLowering();
2893  SDValue TFI = CurDAG->getTargetFrameIndex(
2894  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2895  SDLoc DL(Node);
2896  SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
2897  CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
2898  CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
2899  return;
2900  }
2901  case ISD::INTRINSIC_W_CHAIN: {
2902  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
2903  switch (IntNo) {
2904  default:
2905  break;
2907  case Intrinsic::aarch64_ldxp: {
2908  unsigned Op =
2909  IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
2910  SDValue MemAddr = Node->getOperand(2);
2911  SDLoc DL(Node);
2912  SDValue Chain = Node->getOperand(0);
2913 
2914  SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
2915  MVT::Other, MemAddr, Chain);
2916 
2917  // Transfer memoperands.
2918  MachineMemOperand *MemOp =
2919  cast<MemIntrinsicSDNode>(Node)->getMemOperand();
2920  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
2921  ReplaceNode(Node, Ld);
2922  return;
2923  }
2925  case Intrinsic::aarch64_stxp: {
2926  unsigned Op =
2927  IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
2928  SDLoc DL(Node);
2929  SDValue Chain = Node->getOperand(0);
2930  SDValue ValLo = Node->getOperand(2);
2931  SDValue ValHi = Node->getOperand(3);
2932  SDValue MemAddr = Node->getOperand(4);
2933 
2934  // Place arguments in the right order.
2935  SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
2936 
2937  SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
2938  // Transfer memoperands.
2939  MachineMemOperand *MemOp =
2940  cast<MemIntrinsicSDNode>(Node)->getMemOperand();
2941  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2942 
2943  ReplaceNode(Node, St);
2944  return;
2945  }
2947  if (VT == MVT::v8i8) {
2948  SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
2949  return;
2950  } else if (VT == MVT::v16i8) {
2951  SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
2952  return;
2953  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
2954  SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
2955  return;
2956  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
2957  SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
2958  return;
2959  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
2960  SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
2961  return;
2962  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
2963  SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
2964  return;
2965  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
2966  SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
2967  return;
2968  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
2969  SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
2970  return;
2971  }
2972  break;
2974  if (VT == MVT::v8i8) {
2975  SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
2976  return;
2977  } else if (VT == MVT::v16i8) {
2978  SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
2979  return;
2980  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
2981  SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
2982  return;
2983  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
2984  SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
2985  return;
2986  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
2987  SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
2988  return;
2989  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
2990  SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
2991  return;
2992  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
2993  SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
2994  return;
2995  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
2996  SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
2997  return;
2998  }
2999  break;
3001  if (VT == MVT::v8i8) {
3002  SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
3003  return;
3004  } else if (VT == MVT::v16i8) {
3005  SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
3006  return;
3007  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3008  SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
3009  return;
3010  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3011  SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
3012  return;
3013  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3014  SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
3015  return;
3016  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3017  SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
3018  return;
3019  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3020  SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3021  return;
3022  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3023  SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
3024  return;
3025  }
3026  break;
3028  if (VT == MVT::v8i8) {
3029  SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
3030  return;
3031  } else if (VT == MVT::v16i8) {
3032  SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
3033  return;
3034  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3035  SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
3036  return;
3037  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3038  SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
3039  return;
3040  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3041  SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
3042  return;
3043  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3044  SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
3045  return;
3046  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3047  SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
3048  return;
3049  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3050  SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
3051  return;
3052  }
3053  break;
3055  if (VT == MVT::v8i8) {
3056  SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
3057  return;
3058  } else if (VT == MVT::v16i8) {
3059  SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
3060  return;
3061  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3062  SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
3063  return;
3064  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3065  SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
3066  return;
3067  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3068  SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
3069  return;
3070  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3071  SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
3072  return;
3073  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3074  SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
3075  return;
3076  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3077  SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
3078  return;
3079  }
3080  break;
3082  if (VT == MVT::v8i8) {
3083  SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
3084  return;
3085  } else if (VT == MVT::v16i8) {
3086  SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
3087  return;
3088  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3089  SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
3090  return;
3091  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3092  SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
3093  return;
3094  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3095  SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
3096  return;
3097  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3098  SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
3099  return;
3100  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3101  SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3102  return;
3103  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3104  SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
3105  return;
3106  }
3107  break;
3109  if (VT == MVT::v8i8) {
3110  SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
3111  return;
3112  } else if (VT == MVT::v16i8) {
3113  SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
3114  return;
3115  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3116  SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
3117  return;
3118  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3119  SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
3120  return;
3121  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3122  SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
3123  return;
3124  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3125  SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
3126  return;
3127  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3128  SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
3129  return;
3130  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3131  SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
3132  return;
3133  }
3134  break;
3136  if (VT == MVT::v8i8) {
3137  SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
3138  return;
3139  } else if (VT == MVT::v16i8) {
3140  SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
3141  return;
3142  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3143  SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
3144  return;
3145  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3146  SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
3147  return;
3148  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3149  SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
3150  return;
3151  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3152  SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
3153  return;
3154  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3155  SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
3156  return;
3157  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3158  SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
3159  return;
3160  }
3161  break;
3163  if (VT == MVT::v8i8) {
3164  SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
3165  return;
3166  } else if (VT == MVT::v16i8) {
3167  SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
3168  return;
3169  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3170  SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
3171  return;
3172  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3173  SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
3174  return;
3175  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3176  SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
3177  return;
3178  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3179  SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
3180  return;
3181  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3182  SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
3183  return;
3184  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3185  SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
3186  return;
3187  }
3188  break;
3190  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3191  SelectLoadLane(Node, 2, AArch64::LD2i8);
3192  return;
3193  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3194  VT == MVT::v8f16) {
3195  SelectLoadLane(Node, 2, AArch64::LD2i16);
3196  return;
3197  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3198  VT == MVT::v2f32) {
3199  SelectLoadLane(Node, 2, AArch64::LD2i32);
3200  return;
3201  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3202  VT == MVT::v1f64) {
3203  SelectLoadLane(Node, 2, AArch64::LD2i64);
3204  return;
3205  }
3206  break;
3208  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3209  SelectLoadLane(Node, 3, AArch64::LD3i8);
3210  return;
3211  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3212  VT == MVT::v8f16) {
3213  SelectLoadLane(Node, 3, AArch64::LD3i16);
3214  return;
3215  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3216  VT == MVT::v2f32) {
3217  SelectLoadLane(Node, 3, AArch64::LD3i32);
3218  return;
3219  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3220  VT == MVT::v1f64) {
3221  SelectLoadLane(Node, 3, AArch64::LD3i64);
3222  return;
3223  }
3224  break;
3226  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3227  SelectLoadLane(Node, 4, AArch64::LD4i8);
3228  return;
3229  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3230  VT == MVT::v8f16) {
3231  SelectLoadLane(Node, 4, AArch64::LD4i16);
3232  return;
3233  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3234  VT == MVT::v2f32) {
3235  SelectLoadLane(Node, 4, AArch64::LD4i32);
3236  return;
3237  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3238  VT == MVT::v1f64) {
3239  SelectLoadLane(Node, 4, AArch64::LD4i64);
3240  return;
3241  }
3242  break;
3243  }
3244  } break;
3245  case ISD::INTRINSIC_WO_CHAIN: {
3246  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
3247  switch (IntNo) {
3248  default:
3249  break;
3251  SelectTable(Node, 2,
3252  VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
3253  false);
3254  return;
3256  SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
3257  : AArch64::TBLv16i8Three,
3258  false);
3259  return;
3261  SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
3262  : AArch64::TBLv16i8Four,
3263  false);
3264  return;
3266  SelectTable(Node, 2,
3267  VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
3268  true);
3269  return;
3271  SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
3272  : AArch64::TBXv16i8Three,
3273  true);
3274  return;
3276  SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
3277  : AArch64::TBXv16i8Four,
3278  true);
3279  return;
3282  if (tryMULLV64LaneV128(IntNo, Node))
3283  return;
3284  break;
3285  }
3286  break;
3287  }
3288  case ISD::INTRINSIC_VOID: {
3289  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
3290  if (Node->getNumOperands() >= 3)
3291  VT = Node->getOperand(2)->getValueType(0);
3292  switch (IntNo) {
3293  default:
3294  break;
3296  if (VT == MVT::v8i8) {
3297  SelectStore(Node, 2, AArch64::ST1Twov8b);
3298  return;
3299  } else if (VT == MVT::v16i8) {
3300  SelectStore(Node, 2, AArch64::ST1Twov16b);
3301  return;
3302  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3303  SelectStore(Node, 2, AArch64::ST1Twov4h);
3304  return;
3305  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3306  SelectStore(Node, 2, AArch64::ST1Twov8h);
3307  return;
3308  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3309  SelectStore(Node, 2, AArch64::ST1Twov2s);
3310  return;
3311  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3312  SelectStore(Node, 2, AArch64::ST1Twov4s);
3313  return;
3314  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3315  SelectStore(Node, 2, AArch64::ST1Twov2d);
3316  return;
3317  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3318  SelectStore(Node, 2, AArch64::ST1Twov1d);
3319  return;
3320  }
3321  break;
3322  }
3324  if (VT == MVT::v8i8) {
3325  SelectStore(Node, 3, AArch64::ST1Threev8b);
3326  return;
3327  } else if (VT == MVT::v16i8) {
3328  SelectStore(Node, 3, AArch64::ST1Threev16b);
3329  return;
3330  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3331  SelectStore(Node, 3, AArch64::ST1Threev4h);
3332  return;
3333  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3334  SelectStore(Node, 3, AArch64::ST1Threev8h);
3335  return;
3336  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3337  SelectStore(Node, 3, AArch64::ST1Threev2s);
3338  return;
3339  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3340  SelectStore(Node, 3, AArch64::ST1Threev4s);
3341  return;
3342  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3343  SelectStore(Node, 3, AArch64::ST1Threev2d);
3344  return;
3345  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3346  SelectStore(Node, 3, AArch64::ST1Threev1d);
3347  return;
3348  }
3349  break;
3350  }
3352  if (VT == MVT::v8i8) {
3353  SelectStore(Node, 4, AArch64::ST1Fourv8b);
3354  return;
3355  } else if (VT == MVT::v16i8) {
3356  SelectStore(Node, 4, AArch64::ST1Fourv16b);
3357  return;
3358  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3359  SelectStore(Node, 4, AArch64::ST1Fourv4h);
3360  return;
3361  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3362  SelectStore(Node, 4, AArch64::ST1Fourv8h);
3363  return;
3364  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3365  SelectStore(Node, 4, AArch64::ST1Fourv2s);
3366  return;
3367  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3368  SelectStore(Node, 4, AArch64::ST1Fourv4s);
3369  return;
3370  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3371  SelectStore(Node, 4, AArch64::ST1Fourv2d);
3372  return;
3373  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3374  SelectStore(Node, 4, AArch64::ST1Fourv1d);
3375  return;
3376  }
3377  break;
3378  }
3380  if (VT == MVT::v8i8) {
3381  SelectStore(Node, 2, AArch64::ST2Twov8b);
3382  return;
3383  } else if (VT == MVT::v16i8) {
3384  SelectStore(Node, 2, AArch64::ST2Twov16b);
3385  return;
3386  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3387  SelectStore(Node, 2, AArch64::ST2Twov4h);
3388  return;
3389  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3390  SelectStore(Node, 2, AArch64::ST2Twov8h);
3391  return;
3392  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3393  SelectStore(Node, 2, AArch64::ST2Twov2s);
3394  return;
3395  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3396  SelectStore(Node, 2, AArch64::ST2Twov4s);
3397  return;
3398  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3399  SelectStore(Node, 2, AArch64::ST2Twov2d);
3400  return;
3401  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3402  SelectStore(Node, 2, AArch64::ST1Twov1d);
3403  return;
3404  }
3405  break;
3406  }
3408  if (VT == MVT::v8i8) {
3409  SelectStore(Node, 3, AArch64::ST3Threev8b);
3410  return;
3411  } else if (VT == MVT::v16i8) {
3412  SelectStore(Node, 3, AArch64::ST3Threev16b);
3413  return;
3414  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3415  SelectStore(Node, 3, AArch64::ST3Threev4h);
3416  return;
3417  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3418  SelectStore(Node, 3, AArch64::ST3Threev8h);
3419  return;
3420  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3421  SelectStore(Node, 3, AArch64::ST3Threev2s);
3422  return;
3423  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3424  SelectStore(Node, 3, AArch64::ST3Threev4s);
3425  return;
3426  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3427  SelectStore(Node, 3, AArch64::ST3Threev2d);
3428  return;
3429  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3430  SelectStore(Node, 3, AArch64::ST1Threev1d);
3431  return;
3432  }
3433  break;
3434  }
3436  if (VT == MVT::v8i8) {
3437  SelectStore(Node, 4, AArch64::ST4Fourv8b);
3438  return;
3439  } else if (VT == MVT::v16i8) {
3440  SelectStore(Node, 4, AArch64::ST4Fourv16b);
3441  return;
3442  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3443  SelectStore(Node, 4, AArch64::ST4Fourv4h);
3444  return;
3445  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3446  SelectStore(Node, 4, AArch64::ST4Fourv8h);
3447  return;
3448  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3449  SelectStore(Node, 4, AArch64::ST4Fourv2s);
3450  return;
3451  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3452  SelectStore(Node, 4, AArch64::ST4Fourv4s);
3453  return;
3454  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3455  SelectStore(Node, 4, AArch64::ST4Fourv2d);
3456  return;
3457  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3458  SelectStore(Node, 4, AArch64::ST1Fourv1d);
3459  return;
3460  }
3461  break;
3462  }
3464  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3465  SelectStoreLane(Node, 2, AArch64::ST2i8);
3466  return;
3467  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3468  VT == MVT::v8f16) {
3469  SelectStoreLane(Node, 2, AArch64::ST2i16);
3470  return;
3471  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3472  VT == MVT::v2f32) {
3473  SelectStoreLane(Node, 2, AArch64::ST2i32);
3474  return;
3475  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3476  VT == MVT::v1f64) {
3477  SelectStoreLane(Node, 2, AArch64::ST2i64);
3478  return;
3479  }
3480  break;
3481  }
3483  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3484  SelectStoreLane(Node, 3, AArch64::ST3i8);
3485  return;
3486  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3487  VT == MVT::v8f16) {
3488  SelectStoreLane(Node, 3, AArch64::ST3i16);
3489  return;
3490  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3491  VT == MVT::v2f32) {
3492  SelectStoreLane(Node, 3, AArch64::ST3i32);
3493  return;
3494  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3495  VT == MVT::v1f64) {
3496  SelectStoreLane(Node, 3, AArch64::ST3i64);
3497  return;
3498  }
3499  break;
3500  }
3502  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3503  SelectStoreLane(Node, 4, AArch64::ST4i8);
3504  return;
3505  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3506  VT == MVT::v8f16) {
3507  SelectStoreLane(Node, 4, AArch64::ST4i16);
3508  return;
3509  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3510  VT == MVT::v2f32) {
3511  SelectStoreLane(Node, 4, AArch64::ST4i32);
3512  return;
3513  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3514  VT == MVT::v1f64) {
3515  SelectStoreLane(Node, 4, AArch64::ST4i64);
3516  return;
3517  }
3518  break;
3519  }
3520  }
3521  break;
3522  }
3523  case AArch64ISD::LD2post: {
3524  if (VT == MVT::v8i8) {
3525  SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
3526  return;
3527  } else if (VT == MVT::v16i8) {
3528  SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
3529  return;
3530  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3531  SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
3532  return;
3533  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3534  SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
3535  return;
3536  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3537  SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
3538  return;
3539  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3540  SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
3541  return;
3542  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3543  SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
3544  return;
3545  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3546  SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
3547  return;
3548  }
3549  break;
3550  }
3551  case AArch64ISD::LD3post: {
3552  if (VT == MVT::v8i8) {
3553  SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
3554  return;
3555  } else if (VT == MVT::v16i8) {
3556  SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
3557  return;
3558  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3559  SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
3560  return;
3561  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3562  SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
3563  return;
3564  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3565  SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
3566  return;
3567  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3568  SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
3569  return;
3570  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3571  SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
3572  return;
3573  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3574  SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
3575  return;
3576  }
3577  break;
3578  }
3579  case AArch64ISD::LD4post: {
3580  if (VT == MVT::v8i8) {
3581  SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
3582  return;
3583  } else if (VT == MVT::v16i8) {
3584  SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
3585  return;
3586  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3587  SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
3588  return;
3589  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3590  SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
3591  return;
3592  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3593  SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
3594  return;
3595  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3596  SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
3597  return;
3598  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3599  SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
3600  return;
3601  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3602  SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
3603  return;
3604  }
3605  break;
3606  }
3607  case AArch64ISD::LD1x2post: {
3608  if (VT == MVT::v8i8) {
3609  SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
3610  return;
3611  } else if (VT == MVT::v16i8) {
3612  SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
3613  return;
3614  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3615  SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
3616  return;
3617  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3618  SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
3619  return;
3620  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3621  SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
3622  return;
3623  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3624  SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
3625  return;
3626  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3627  SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
3628  return;
3629  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3630  SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
3631  return;
3632  }
3633  break;
3634  }
3635  case AArch64ISD::LD1x3post: {
3636  if (VT == MVT::v8i8) {
3637  SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
3638  return;
3639  } else if (VT == MVT::v16i8) {
3640  SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
3641  return;
3642  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3643  SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
3644  return;
3645  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3646  SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
3647  return;
3648  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3649  SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
3650  return;
3651  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3652  SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
3653  return;
3654  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3655  SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
3656  return;
3657  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3658  SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
3659  return;
3660  }
3661  break;
3662  }
3663  case AArch64ISD::LD1x4post: {
3664  if (VT == MVT::v8i8) {
3665  SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
3666  return;
3667  } else if (VT == MVT::v16i8) {
3668  SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
3669  return;
3670  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3671  SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
3672  return;
3673  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3674  SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
3675  return;
3676  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3677  SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
3678  return;
3679  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3680  SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
3681  return;
3682  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3683  SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
3684  return;
3685  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3686  SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
3687  return;
3688  }
3689  break;
3690  }
3691  case AArch64ISD::LD1DUPpost: {
3692  if (VT == MVT::v8i8) {
3693  SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
3694  return;
3695  } else if (VT == MVT::v16i8) {
3696  SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
3697  return;
3698  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3699  SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
3700  return;
3701  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3702  SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
3703  return;
3704  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3705  SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
3706  return;
3707  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3708  SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
3709  return;
3710  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3711  SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
3712  return;
3713  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3714  SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
3715  return;
3716  }
3717  break;
3718  }
3719  case AArch64ISD::LD2DUPpost: {
3720  if (VT == MVT::v8i8) {
3721  SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
3722  return;
3723  } else if (VT == MVT::v16i8) {
3724  SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
3725  return;
3726  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3727  SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
3728  return;
3729  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3730  SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
3731  return;
3732  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3733  SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
3734  return;
3735  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3736  SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
3737  return;
3738  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3739  SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
3740  return;
3741  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3742  SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
3743  return;
3744  }
3745  break;
3746  }
3747  case AArch64ISD::LD3DUPpost: {
3748  if (VT == MVT::v8i8) {
3749  SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
3750  return;
3751  } else if (VT == MVT::v16i8) {
3752  SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
3753  return;
3754  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3755  SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
3756  return;
3757  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3758  SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
3759  return;
3760  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3761  SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
3762  return;
3763  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3764  SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
3765  return;
3766  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3767  SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
3768  return;
3769  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3770  SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
3771  return;
3772  }
3773  break;
3774  }
3775  case AArch64ISD::LD4DUPpost: {
3776  if (VT == MVT::v8i8) {
3777  SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
3778  return;
3779  } else if (VT == MVT::v16i8) {
3780  SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
3781  return;
3782  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3783  SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
3784  return;
3785  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3786  SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
3787  return;
3788  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3789  SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
3790  return;
3791  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3792  SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
3793  return;
3794  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3795  SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
3796  return;
3797  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3798  SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
3799  return;
3800  }
3801  break;
3802  }
3803  case AArch64ISD::LD1LANEpost: {
3804  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3805  SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
3806  return;
3807  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3808  VT == MVT::v8f16) {
3809  SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
3810  return;
3811  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3812  VT == MVT::v2f32) {
3813  SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
3814  return;
3815  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3816  VT == MVT::v1f64) {
3817  SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
3818  return;
3819  }
3820  break;
3821  }
3822  case AArch64ISD::LD2LANEpost: {
3823  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3824  SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
3825  return;
3826  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3827  VT == MVT::v8f16) {
3828  SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
3829  return;
3830  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3831  VT == MVT::v2f32) {
3832  SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
3833  return;
3834  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3835  VT == MVT::v1f64) {
3836  SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
3837  return;
3838  }
3839  break;
3840  }
3841  case AArch64ISD::LD3LANEpost: {
3842  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3843  SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
3844  return;
3845  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3846  VT == MVT::v8f16) {
3847  SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
3848  return;
3849  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3850  VT == MVT::v2f32) {
3851  SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
3852  return;
3853  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3854  VT == MVT::v1f64) {
3855  SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
3856  return;
3857  }
3858  break;
3859  }
3860  case AArch64ISD::LD4LANEpost: {
3861  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3862  SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
3863  return;
3864  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3865  VT == MVT::v8f16) {
3866  SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
3867  return;
3868  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3869  VT == MVT::v2f32) {
3870  SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
3871  return;
3872  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3873  VT == MVT::v1f64) {
3874  SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
3875  return;
3876  }
3877  break;
3878  }
3879  case AArch64ISD::ST2post: {
3880  VT = Node->getOperand(1).getValueType();
3881  if (VT == MVT::v8i8) {
3882  SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
3883  return;
3884  } else if (VT == MVT::v16i8) {
3885  SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
3886  return;
3887  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3888  SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
3889  return;
3890  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3891  SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
3892  return;
3893  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3894  SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
3895  return;
3896  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3897  SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
3898  return;
3899  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3900  SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
3901  return;
3902  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3903  SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
3904  return;
3905  }
3906  break;
3907  }
3908  case AArch64ISD::ST3post: {
3909  VT = Node->getOperand(1).getValueType();
3910  if (VT == MVT::v8i8) {
3911  SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
3912  return;
3913  } else if (VT == MVT::v16i8) {
3914  SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
3915  return;
3916  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3917  SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
3918  return;
3919  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3920  SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
3921  return;
3922  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3923  SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
3924  return;
3925  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3926  SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
3927  return;
3928  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3929  SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
3930  return;
3931  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3932  SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
3933  return;
3934  }
3935  break;
3936  }
3937  case AArch64ISD::ST4post: {
3938  VT = Node->getOperand(1).getValueType();
3939  if (VT == MVT::v8i8) {
3940  SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
3941  return;
3942  } else if (VT == MVT::v16i8) {
3943  SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
3944  return;
3945  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3946  SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
3947  return;
3948  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3949  SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
3950  return;
3951  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3952  SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
3953  return;
3954  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3955  SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
3956  return;
3957  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3958  SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
3959  return;
3960  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3961  SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
3962  return;
3963  }
3964  break;
3965  }
3966  case AArch64ISD::ST1x2post: {
3967  VT = Node->getOperand(1).getValueType();
3968  if (VT == MVT::v8i8) {
3969  SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
3970  return;
3971  } else if (VT == MVT::v16i8) {
3972  SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
3973  return;
3974  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3975  SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
3976  return;
3977  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3978  SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
3979  return;
3980  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3981  SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
3982  return;
3983  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3984  SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
3985  return;
3986  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3987  SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
3988  return;
3989  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3990  SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
3991  return;
3992  }
3993  break;
3994  }
3995  case AArch64ISD::ST1x3post: {
3996  VT = Node->getOperand(1).getValueType();
3997  if (VT == MVT::v8i8) {
3998  SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
3999  return;
4000  } else if (VT == MVT::v16i8) {
4001  SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
4002  return;
4003  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
4004  SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
4005  return;
4006  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
4007  SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
4008  return;
4009  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4010  SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
4011  return;
4012  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4013  SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
4014  return;
4015  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4016  SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
4017  return;
4018  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4019  SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
4020  return;
4021  }
4022  break;
4023  }
4024  case AArch64ISD::ST1x4post: {
4025  VT = Node->getOperand(1).getValueType();
4026  if (VT == MVT::v8i8) {
4027  SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
4028  return;
4029  } else if (VT == MVT::v16i8) {
4030  SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
4031  return;
4032  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
4033  SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
4034  return;
4035  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
4036  SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
4037  return;
4038  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4039  SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
4040  return;
4041  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4042  SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
4043  return;
4044  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4045  SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
4046  return;
4047  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4048  SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
4049  return;
4050  }
4051  break;
4052  }
4053  case AArch64ISD::ST2LANEpost: {
4054  VT = Node->getOperand(1).getValueType();
4055  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4056  SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
4057  return;
4058  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4059  VT == MVT::v8f16) {
4060  SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
4061  return;
4062  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4063  VT == MVT::v2f32) {
4064  SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
4065  return;
4066  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4067  VT == MVT::v1f64) {
4068  SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
4069  return;
4070  }
4071  break;
4072  }
4073  case AArch64ISD::ST3LANEpost: {
4074  VT = Node->getOperand(1).getValueType();
4075  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4076  SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
4077  return;
4078  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4079  VT == MVT::v8f16) {
4080  SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
4081  return;
4082  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4083  VT == MVT::v2f32) {
4084  SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
4085  return;
4086  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4087  VT == MVT::v1f64) {
4088  SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
4089  return;
4090  }
4091  break;
4092  }
4093  case AArch64ISD::ST4LANEpost: {
4094  VT = Node->getOperand(1).getValueType();
4095  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4096  SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
4097  return;
4098  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4099  VT == MVT::v8f16) {
4100  SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
4101  return;
4102  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4103  VT == MVT::v2f32) {
4104  SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
4105  return;
4106  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4107  VT == MVT::v1f64) {
4108  SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
4109  return;
4110  }
4111  break;
4112  }
4113  }
4114 
4115  // Select the default instruction
4116  SelectCode(Node);
4117 }
4118 
4119 /// createAArch64ISelDag - This pass converts a legalized DAG into a
4120 /// AArch64-specific DAG, ready for instruction scheduling.
4122  CodeGenOpt::Level OptLevel) {
4123  return new AArch64DAGToDAGISel(TM, OptLevel);
4124 }
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1452
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
uint64_t CallInst * C
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &ShiftAmount, int &MaskWidth)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL...
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOffset() const
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void flipAllBits()
Toggle every bit to its opposite value.
Definition: APInt.h:1477
const GlobalValue * getGlobal() const
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1563
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
DiagnosticInfoOptimizationBase::Argument NV
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:358
This class represents lattice values for constants.
Definition: AllocatorList.h:24
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:265
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
iterator begin() const
Definition: ArrayRef.h:137
const SDValue & getBasePtr() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isWorthFoldingADDlow(SDValue N)
If there&#39;s a use of this ADDlow that&#39;s not itself a load/store then we&#39;ll need to create a real ADD i...
unsigned Reg
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
const SDValue & getChain() const
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
unsigned const TargetRegisterInfo * TRI
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1069
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:876
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
Definition: ISDOpcodes.h:353
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1509
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1069
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1632
A description of a memory reference used in the backend.
static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, SDValue &LaneOp, int &LaneIdx)
static bool isPreferredADD(int64_t ImmOff)
Shift and rotation operations.
Definition: ISDOpcodes.h:410
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:478
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
unsigned SubReg
const MDNode * getMD() const
unsigned getScalarValueSizeInBits() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
SimpleValueType SimpleTy
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:978
unsigned getID() const
Return the register class ID number.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
int64_t getSExtValue() const
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:411
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
op_iterator op_begin() const
unsigned getAlignment() const
Definition: Globals.cpp:97
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:576
bool isStrongerThanMonotonic(AtomicOrdering ao)
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:166
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:85
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:120
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
unsigned countPopulation() const
Count the number of bits set.
Definition: APInt.h:1658
Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
const SDValue & getOperand(unsigned Num) const
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:934
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:598
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32...
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo...
Definition: ISDOpcodes.h:796
Extended Value Type.
Definition: ValueTypes.h:34
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
size_t size() const
Definition: SmallVector.h:53
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
void dump() const
Dump this node, for debugging.
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:339
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:265
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:222
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
LLVM_NODISCARD std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:727
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:730
An SDNode that represents everything that will be needed to construct a MachineInstr.
const Constant * getConstVal() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
unsigned logBase2() const
Definition: APInt.h:1748
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:539
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
EVT getMemoryVT() const
Return the type of the in-memory value.
Class for arbitrary precision integers.
Definition: APInt.h:70
iterator_range< use_iterator > uses()
bool isPowerOf2() const
Check if this APInt&#39;s value is a power of two greater than zero.
Definition: APInt.h:464
const SysReg * lookupSysRegByName(StringRef)
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:468
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:471
uint32_t parseGenericRegister(StringRef Name)
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOpt::Level OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG...
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:607
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:177
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:423
static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx)
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:387
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:182
static int getIntOperandFromRegisterString(StringRef RegString)
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:486
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:614
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted, suitable for use in a BFI instruction.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Type * getValueType() const
Definition: GlobalValue.h:276
uint32_t Size
Definition: Profile.cpp:47
unsigned getOpcode() const
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:1268
static bool isShiftedMask(uint64_t Mask, EVT VT)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void dumpr() const
Dump (recursively) this node and its use-def subgraph.
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:417
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
A single uniqued string.
Definition: Metadata.h:604
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
Definition: APInt.h:1596
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
Conversion operators.
Definition: ISDOpcodes.h:465
const SDValue & getOperand(unsigned i) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:789
uint64_t getZExtValue() const
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:474
#define LLVM_DEBUG(X)
Definition: Debug.h:123
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
std::size_t countLeadingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the most significant bit to the first zero bit.
Definition: MathExtras.h:462
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:785
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
virtual const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const
Returns a TargetRegisterClass used for pointer values.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:914
This class is used to represent ISD::LOAD nodes.