LLVM  8.0.1
R600ISelLowering.cpp
Go to the documentation of this file.
1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// Custom DAG lowering for R600
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "R600ISelLowering.h"
16 #include "AMDGPUFrameLowering.h"
17 #include "AMDGPUSubtarget.h"
18 #include "R600Defines.h"
19 #include "R600FrameLowering.h"
20 #include "R600InstrInfo.h"
23 #include "Utils/AMDGPUBaseInfo.h"
24 #include "llvm/ADT/APFloat.h"
25 #include "llvm/ADT/APInt.h"
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/ADT/DenseMap.h"
28 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/IR/Constants.h"
40 #include "llvm/IR/DerivedTypes.h"
41 #include "llvm/Support/Casting.h"
42 #include "llvm/Support/Compiler.h"
45 #include <cassert>
46 #include <cstdint>
47 #include <iterator>
48 #include <utility>
49 #include <vector>
50 
51 using namespace llvm;
52 
53 #include "R600GenCallingConv.inc"
54 
56  const R600Subtarget &STI)
57  : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) {
58  addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass);
59  addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass);
60  addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass);
61  addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass);
62  addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
63  addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);
64 
66 
67  // Legalize loads and stores to the private address space.
71 
72  // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
73  // spaces, so it is custom lowered to handle those where it isn't.
74  for (MVT VT : MVT::integer_valuetypes()) {
78 
82 
86  }
87 
88  // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
92 
96 
101 
104  // We need to include these since trunc STORES to PRIVATE need
105  // special handling to accommodate RMW
116 
117  // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
120 
121  // Set condition code actions
134 
139 
142 
145 
149 
151 
156 
159 
166 
171 
172  // ADD, SUB overflow.
173  // TODO: turn these into Legal?
174  if (Subtarget->hasCARRY())
176 
177  if (Subtarget->hasBORROW())
179 
180  // Expand sign extension of vectors
181  if (!Subtarget->hasBFE())
183 
186 
187  if (!Subtarget->hasBFE())
191 
192  if (!Subtarget->hasBFE())
196 
200 
202 
204 
209 
214 
215  // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
216  // to be Legal/Custom in order to avoid library calls.
220 
221  if (!Subtarget->hasFMA()) {
224  }
225 
226  // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
227  // need it for R600.
228  if (!Subtarget->hasFP32Denormals())
230 
231  if (!Subtarget->hasBFI()) {
232  // fcopysign can be done in a single instruction with BFI.
235  }
236 
237  if (!Subtarget->hasBCNT(32))
239 
240  if (!Subtarget->hasBCNT(64))
242 
243  if (Subtarget->hasFFBH())
245 
246  if (Subtarget->hasFFBL())
248 
249  // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
250  // need it for R600.
251  if (Subtarget->hasBFE())
252  setHasExtractBitsInsn(true);
253 
255 
256  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
257  for (MVT VT : ScalarIntVTs) {
262  }
263 
264  // LLVM will expand these to atomic_cmp_swap(0)
265  // and atomic_swap, respectively.
268 
269  // We need to custom lower some of the intrinsics
272 
274 
281 }
282 
283 static inline bool isEOP(MachineBasicBlock::iterator I) {
284  if (std::next(I) == I->getParent()->end())
285  return false;
286  return std::next(I)->getOpcode() == R600::RETURN;
287 }
288 
291  MachineBasicBlock *BB) const {
292  MachineFunction *MF = BB->getParent();
295  const R600InstrInfo *TII = Subtarget->getInstrInfo();
296 
297  switch (MI.getOpcode()) {
298  default:
299  // Replace LDS_*_RET instruction that don't have any uses with the
300  // equivalent LDS_*_NORET instruction.
301  if (TII->isLDSRetInstr(MI.getOpcode())) {
302  int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
303  assert(DstIdx != -1);
304  MachineInstrBuilder NewMI;
305  // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
306  // LDS_1A2D support and remove this special case.
307  if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
308  MI.getOpcode() == R600::LDS_CMPST_RET)
309  return BB;
310 
311  NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
312  TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
313  for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) {
314  NewMI.add(MI.getOperand(i));
315  }
316  } else {
318  }
319  break;
320 
321  case R600::FABS_R600: {
323  *BB, I, R600::MOV, MI.getOperand(0).getReg(),
324  MI.getOperand(1).getReg());
325  TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
326  break;
327  }
328 
329  case R600::FNEG_R600: {
331  *BB, I, R600::MOV, MI.getOperand(0).getReg(),
332  MI.getOperand(1).getReg());
333  TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
334  break;
335  }
336 
337  case R600::MASK_WRITE: {
338  unsigned maskedRegister = MI.getOperand(0).getReg();
340  MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
341  TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
342  break;
343  }
344 
345  case R600::MOV_IMM_F32:
346  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
347  .getFPImm()
348  ->getValueAPF()
349  .bitcastToAPInt()
350  .getZExtValue());
351  break;
352 
353  case R600::MOV_IMM_I32:
354  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
355  MI.getOperand(1).getImm());
356  break;
357 
358  case R600::MOV_IMM_GLOBAL_ADDR: {
359  //TODO: Perhaps combine this instruction with the next if possible
360  auto MIB = TII->buildDefaultInstruction(
361  *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);
362  int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
363  //TODO: Ugh this is rather ugly
364  MIB->getOperand(Idx) = MI.getOperand(1);
365  break;
366  }
367 
368  case R600::CONST_COPY: {
370  *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST);
371  TII->setImmOperand(*NewMI, R600::OpName::src0_sel,
372  MI.getOperand(1).getImm());
373  break;
374  }
375 
376  case R600::RAT_WRITE_CACHELESS_32_eg:
377  case R600::RAT_WRITE_CACHELESS_64_eg:
378  case R600::RAT_WRITE_CACHELESS_128_eg:
379  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
380  .add(MI.getOperand(0))
381  .add(MI.getOperand(1))
382  .addImm(isEOP(I)); // Set End of program bit
383  break;
384 
385  case R600::RAT_STORE_TYPED_eg:
386  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
387  .add(MI.getOperand(0))
388  .add(MI.getOperand(1))
389  .add(MI.getOperand(2))
390  .addImm(isEOP(I)); // Set End of program bit
391  break;
392 
393  case R600::BRANCH:
394  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))
395  .add(MI.getOperand(0));
396  break;
397 
398  case R600::BRANCH_COND_f32: {
399  MachineInstr *NewMI =
400  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
401  R600::PREDICATE_BIT)
402  .add(MI.getOperand(1))
403  .addImm(R600::PRED_SETNE)
404  .addImm(0); // Flags
405  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
406  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
407  .add(MI.getOperand(0))
408  .addReg(R600::PREDICATE_BIT, RegState::Kill);
409  break;
410  }
411 
412  case R600::BRANCH_COND_i32: {
413  MachineInstr *NewMI =
414  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
415  R600::PREDICATE_BIT)
416  .add(MI.getOperand(1))
417  .addImm(R600::PRED_SETNE_INT)
418  .addImm(0); // Flags
419  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
420  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
421  .add(MI.getOperand(0))
422  .addReg(R600::PREDICATE_BIT, RegState::Kill);
423  break;
424  }
425 
426  case R600::EG_ExportSwz:
427  case R600::R600_ExportSwz: {
428  // Instruction is left unmodified if its not the last one of its type
429  bool isLastInstructionOfItsType = true;
430  unsigned InstExportType = MI.getOperand(1).getImm();
431  for (MachineBasicBlock::iterator NextExportInst = std::next(I),
432  EndBlock = BB->end(); NextExportInst != EndBlock;
433  NextExportInst = std::next(NextExportInst)) {
434  if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||
435  NextExportInst->getOpcode() == R600::R600_ExportSwz) {
436  unsigned CurrentInstExportType = NextExportInst->getOperand(1)
437  .getImm();
438  if (CurrentInstExportType == InstExportType) {
439  isLastInstructionOfItsType = false;
440  break;
441  }
442  }
443  }
444  bool EOP = isEOP(I);
445  if (!EOP && !isLastInstructionOfItsType)
446  return BB;
447  unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;
448  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
449  .add(MI.getOperand(0))
450  .add(MI.getOperand(1))
451  .add(MI.getOperand(2))
452  .add(MI.getOperand(3))
453  .add(MI.getOperand(4))
454  .add(MI.getOperand(5))
455  .add(MI.getOperand(6))
456  .addImm(CfInst)
457  .addImm(EOP);
458  break;
459  }
460  case R600::RETURN: {
461  return BB;
462  }
463  }
464 
465  MI.eraseFromParent();
466  return BB;
467 }
468 
469 //===----------------------------------------------------------------------===//
470 // Custom DAG Lowering Operations
471 //===----------------------------------------------------------------------===//
472 
476  switch (Op.getOpcode()) {
477  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
478  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
479  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
480  case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
481  case ISD::SRA_PARTS:
482  case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
483  case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
484  case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
485  case ISD::FCOS:
486  case ISD::FSIN: return LowerTrig(Op, DAG);
487  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
488  case ISD::STORE: return LowerSTORE(Op, DAG);
489  case ISD::LOAD: {
490  SDValue Result = LowerLOAD(Op, DAG);
491  assert((!Result.getNode() ||
492  Result.getNode()->getNumValues() == 2) &&
493  "Load should return a value and a chain");
494  return Result;
495  }
496 
497  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
498  case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
499  case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
500  case ISD::INTRINSIC_VOID: {
501  SDValue Chain = Op.getOperand(0);
502  unsigned IntrinsicID =
503  cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
504  switch (IntrinsicID) {
506  SDLoc DL(Op);
507  const SDValue Args[8] = {
508  Chain,
509  Op.getOperand(2), // Export Value
510  Op.getOperand(3), // ArrayBase
511  Op.getOperand(4), // Type
512  DAG.getConstant(0, DL, MVT::i32), // SWZ_X
513  DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
514  DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
515  DAG.getConstant(3, DL, MVT::i32) // SWZ_W
516  };
517  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);
518  }
519 
520  // default for switch(IntrinsicID)
521  default: break;
522  }
523  // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
524  break;
525  }
527  unsigned IntrinsicID =
528  cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
529  EVT VT = Op.getValueType();
530  SDLoc DL(Op);
531  switch (IntrinsicID) {
532  case Intrinsic::r600_tex:
533  case Intrinsic::r600_texc: {
534  unsigned TextureOp;
535  switch (IntrinsicID) {
536  case Intrinsic::r600_tex:
537  TextureOp = 0;
538  break;
540  TextureOp = 1;
541  break;
542  default:
543  llvm_unreachable("unhandled texture operation");
544  }
545 
546  SDValue TexArgs[19] = {
547  DAG.getConstant(TextureOp, DL, MVT::i32),
548  Op.getOperand(1),
549  DAG.getConstant(0, DL, MVT::i32),
550  DAG.getConstant(1, DL, MVT::i32),
551  DAG.getConstant(2, DL, MVT::i32),
552  DAG.getConstant(3, DL, MVT::i32),
553  Op.getOperand(2),
554  Op.getOperand(3),
555  Op.getOperand(4),
556  DAG.getConstant(0, DL, MVT::i32),
557  DAG.getConstant(1, DL, MVT::i32),
558  DAG.getConstant(2, DL, MVT::i32),
559  DAG.getConstant(3, DL, MVT::i32),
560  Op.getOperand(5),
561  Op.getOperand(6),
562  Op.getOperand(7),
563  Op.getOperand(8),
564  Op.getOperand(9),
565  Op.getOperand(10)
566  };
567  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
568  }
569  case Intrinsic::r600_dot4: {
570  SDValue Args[8] = {
572  DAG.getConstant(0, DL, MVT::i32)),
574  DAG.getConstant(0, DL, MVT::i32)),
576  DAG.getConstant(1, DL, MVT::i32)),
578  DAG.getConstant(1, DL, MVT::i32)),
580  DAG.getConstant(2, DL, MVT::i32)),
582  DAG.getConstant(2, DL, MVT::i32)),
584  DAG.getConstant(3, DL, MVT::i32)),
586  DAG.getConstant(3, DL, MVT::i32))
587  };
588  return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
589  }
590 
594  return DAG.getConstant(ByteOffset, DL, PtrVT);
595  }
597  return LowerImplicitParameter(DAG, VT, DL, 0);
599  return LowerImplicitParameter(DAG, VT, DL, 1);
601  return LowerImplicitParameter(DAG, VT, DL, 2);
603  return LowerImplicitParameter(DAG, VT, DL, 3);
605  return LowerImplicitParameter(DAG, VT, DL, 4);
607  return LowerImplicitParameter(DAG, VT, DL, 5);
609  return LowerImplicitParameter(DAG, VT, DL, 6);
611  return LowerImplicitParameter(DAG, VT, DL, 7);
613  return LowerImplicitParameter(DAG, VT, DL, 8);
614 
616  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
617  R600::T1_X, VT);
619  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
620  R600::T1_Y, VT);
622  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
623  R600::T1_Z, VT);
625  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
626  R600::T0_X, VT);
628  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
629  R600::T0_Y, VT);
631  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
632  R600::T0_Z, VT);
633 
635  return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
636 
638  return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
639  default:
640  return Op;
641  }
642 
643  // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
644  break;
645  }
646  } // end switch(Op.getOpcode())
647  return SDValue();
648 }
649 
652  SelectionDAG &DAG) const {
653  switch (N->getOpcode()) {
654  default:
656  return;
657  case ISD::FP_TO_UINT:
658  if (N->getValueType(0) == MVT::i1) {
659  Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
660  return;
661  }
662  // Since we don't care about out of bounds values we can use FP_TO_SINT for
663  // uints too. The DAGLegalizer code for uint considers some extra cases
664  // which are not necessary here.
666  case ISD::FP_TO_SINT: {
667  if (N->getValueType(0) == MVT::i1) {
668  Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
669  return;
670  }
671 
672  SDValue Result;
673  if (expandFP_TO_SINT(N, Result, DAG))
674  Results.push_back(Result);
675  return;
676  }
677  case ISD::SDIVREM: {
678  SDValue Op = SDValue(N, 1);
679  SDValue RES = LowerSDIVREM(Op, DAG);
680  Results.push_back(RES);
681  Results.push_back(RES.getValue(1));
682  break;
683  }
684  case ISD::UDIVREM: {
685  SDValue Op = SDValue(N, 0);
686  LowerUDIVREM64(Op, DAG, Results);
687  break;
688  }
689  }
690 }
691 
692 SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
693  SDValue Vector) const {
694  SDLoc DL(Vector);
695  EVT VecVT = Vector.getValueType();
696  EVT EltVT = VecVT.getVectorElementType();
698 
699  for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {
700  Args.push_back(DAG.getNode(
701  ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
702  DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
703  }
704 
705  return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
706 }
707 
708 SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
709  SelectionDAG &DAG) const {
710  SDLoc DL(Op);
711  SDValue Vector = Op.getOperand(0);
712  SDValue Index = Op.getOperand(1);
713 
714  if (isa<ConstantSDNode>(Index) ||
716  return Op;
717 
718  Vector = vectorToVerticalVector(DAG, Vector);
719  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
720  Vector, Index);
721 }
722 
723 SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
724  SelectionDAG &DAG) const {
725  SDLoc DL(Op);
726  SDValue Vector = Op.getOperand(0);
727  SDValue Value = Op.getOperand(1);
728  SDValue Index = Op.getOperand(2);
729 
730  if (isa<ConstantSDNode>(Index) ||
732  return Op;
733 
734  Vector = vectorToVerticalVector(DAG, Vector);
735  SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
736  Vector, Value, Index);
737  return vectorToVerticalVector(DAG, Insert);
738 }
739 
740 SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
741  SDValue Op,
742  SelectionDAG &DAG) const {
743  GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
745  return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
746 
747  const DataLayout &DL = DAG.getDataLayout();
748  const GlobalValue *GV = GSD->getGlobal();
749  MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
750 
751  SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
752  return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
753 }
754 
755 SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
756  // On hw >= R700, COS/SIN input must be between -1. and 1.
757  // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
758  EVT VT = Op.getValueType();
759  SDValue Arg = Op.getOperand(0);
760  SDLoc DL(Op);
761 
762  // TODO: Should this propagate fast-math-flags?
763  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
764  DAG.getNode(ISD::FADD, DL, VT,
765  DAG.getNode(ISD::FMUL, DL, VT, Arg,
766  DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
767  DAG.getConstantFP(0.5, DL, MVT::f32)));
768  unsigned TrigNode;
769  switch (Op.getOpcode()) {
770  case ISD::FCOS:
771  TrigNode = AMDGPUISD::COS_HW;
772  break;
773  case ISD::FSIN:
774  TrigNode = AMDGPUISD::SIN_HW;
775  break;
776  default:
777  llvm_unreachable("Wrong trig opcode");
778  }
779  SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
780  DAG.getNode(ISD::FADD, DL, VT, FractPart,
781  DAG.getConstantFP(-0.5, DL, MVT::f32)));
782  if (Gen >= AMDGPUSubtarget::R700)
783  return TrigVal;
784  // On R600 hw, COS/SIN input must be between -Pi and Pi.
785  return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
786  DAG.getConstantFP(3.14159265359, DL, MVT::f32));
787 }
788 
789 SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
790  SDLoc DL(Op);
791  EVT VT = Op.getValueType();
792 
793  SDValue Lo = Op.getOperand(0);
794  SDValue Hi = Op.getOperand(1);
795  SDValue Shift = Op.getOperand(2);
796  SDValue Zero = DAG.getConstant(0, DL, VT);
797  SDValue One = DAG.getConstant(1, DL, VT);
798 
799  SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
800  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
801  SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
802  SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
803 
804  // The dance around Width1 is necessary for 0 special case.
805  // Without it the CompShift might be 32, producing incorrect results in
806  // Overflow. So we do the shift in two steps, the alternative is to
807  // add a conditional to filter the special case.
808 
809  SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
810  Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
811 
812  SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
813  HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
814  SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
815 
816  SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
817  SDValue LoBig = Zero;
818 
819  Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
820  Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
821 
822  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
823 }
824 
825 SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
826  SDLoc DL(Op);
827  EVT VT = Op.getValueType();
828 
829  SDValue Lo = Op.getOperand(0);
830  SDValue Hi = Op.getOperand(1);
831  SDValue Shift = Op.getOperand(2);
832  SDValue Zero = DAG.getConstant(0, DL, VT);
833  SDValue One = DAG.getConstant(1, DL, VT);
834 
835  const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
836 
837  SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
838  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
839  SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
840  SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
841 
842  // The dance around Width1 is necessary for 0 special case.
843  // Without it the CompShift might be 32, producing incorrect results in
844  // Overflow. So we do the shift in two steps, the alternative is to
845  // add a conditional to filter the special case.
846 
847  SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
848  Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
849 
850  SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
851  SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
852  LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
853 
854  SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
855  SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
856 
857  Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
858  Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
859 
860  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
861 }
862 
863 SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
864  unsigned mainop, unsigned ovf) const {
865  SDLoc DL(Op);
866  EVT VT = Op.getValueType();
867 
868  SDValue Lo = Op.getOperand(0);
869  SDValue Hi = Op.getOperand(1);
870 
871  SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
872  // Extend sign.
873  OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
874  DAG.getValueType(MVT::i1));
875 
876  SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
877 
878  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
879 }
880 
881 SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
882  SDLoc DL(Op);
883  return DAG.getNode(
884  ISD::SETCC,
885  DL,
886  MVT::i1,
887  Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
888  DAG.getCondCode(ISD::SETEQ));
889 }
890 
891 SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
892  SDLoc DL(Op);
893  return DAG.getNode(
894  ISD::SETCC,
895  DL,
896  MVT::i1,
897  Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
898  DAG.getCondCode(ISD::SETEQ));
899 }
900 
901 SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
902  const SDLoc &DL,
903  unsigned DwordOffset) const {
904  unsigned ByteOffset = DwordOffset * 4;
905  PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
907 
908  // We shouldn't be using an offset wider than 16-bits for implicit parameters.
909  assert(isInt<16>(ByteOffset));
910 
911  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
912  DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
914 }
915 
916 bool R600TargetLowering::isZero(SDValue Op) const {
917  if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
918  return Cst->isNullValue();
919  } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
920  return CstFP->isZero();
921  } else {
922  return false;
923  }
924 }
925 
926 bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
927  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
928  return CFP->isExactlyValue(1.0);
929  }
930  return isAllOnesConstant(Op);
931 }
932 
933 bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
934  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
935  return CFP->getValueAPF().isZero();
936  }
937  return isNullConstant(Op);
938 }
939 
940 SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
941  SDLoc DL(Op);
942  EVT VT = Op.getValueType();
943 
944  SDValue LHS = Op.getOperand(0);
945  SDValue RHS = Op.getOperand(1);
946  SDValue True = Op.getOperand(2);
947  SDValue False = Op.getOperand(3);
948  SDValue CC = Op.getOperand(4);
949  SDValue Temp;
950 
951  if (VT == MVT::f32) {
952  DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
953  SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
954  if (MinMax)
955  return MinMax;
956  }
957 
958  // LHS and RHS are guaranteed to be the same value type
959  EVT CompareVT = LHS.getValueType();
960 
961  // Check if we can lower this to a native operation.
962 
963  // Try to lower to a SET* instruction:
964  //
965  // SET* can match the following patterns:
966  //
967  // select_cc f32, f32, -1, 0, cc_supported
968  // select_cc f32, f32, 1.0f, 0.0f, cc_supported
969  // select_cc i32, i32, -1, 0, cc_supported
970  //
971 
972  // Move hardware True/False values to the correct operand.
973  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
974  ISD::CondCode InverseCC =
975  ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
976  if (isHWTrueValue(False) && isHWFalseValue(True)) {
977  if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
978  std::swap(False, True);
979  CC = DAG.getCondCode(InverseCC);
980  } else {
981  ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
982  if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
983  std::swap(False, True);
984  std::swap(LHS, RHS);
985  CC = DAG.getCondCode(SwapInvCC);
986  }
987  }
988  }
989 
990  if (isHWTrueValue(True) && isHWFalseValue(False) &&
991  (CompareVT == VT || VT == MVT::i32)) {
992  // This can be matched by a SET* instruction.
993  return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
994  }
995 
996  // Try to lower to a CND* instruction:
997  //
998  // CND* can match the following patterns:
999  //
1000  // select_cc f32, 0.0, f32, f32, cc_supported
1001  // select_cc f32, 0.0, i32, i32, cc_supported
1002  // select_cc i32, 0, f32, f32, cc_supported
1003  // select_cc i32, 0, i32, i32, cc_supported
1004  //
1005 
1006  // Try to move the zero value to the RHS
1007  if (isZero(LHS)) {
1008  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1009  // Try swapping the operands
1010  ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1011  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1012  std::swap(LHS, RHS);
1013  CC = DAG.getCondCode(CCSwapped);
1014  } else {
1015  // Try inverting the conditon and then swapping the operands
1016  ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1017  CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1018  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1019  std::swap(True, False);
1020  std::swap(LHS, RHS);
1021  CC = DAG.getCondCode(CCSwapped);
1022  }
1023  }
1024  }
1025  if (isZero(RHS)) {
1026  SDValue Cond = LHS;
1027  SDValue Zero = RHS;
1028  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1029  if (CompareVT != VT) {
1030  // Bitcast True / False to the correct types. This will end up being
1031  // a nop, but it allows us to define only a single pattern in the
1032  // .TD files for each CND* instruction rather than having to have
1033  // one pattern for integer True/False and one for fp True/False
1034  True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1035  False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1036  }
1037 
1038  switch (CCOpcode) {
1039  case ISD::SETONE:
1040  case ISD::SETUNE:
1041  case ISD::SETNE:
1042  CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1043  Temp = True;
1044  True = False;
1045  False = Temp;
1046  break;
1047  default:
1048  break;
1049  }
1050  SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1051  Cond, Zero,
1052  True, False,
1053  DAG.getCondCode(CCOpcode));
1054  return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1055  }
1056 
1057  // If we make it this for it means we have no native instructions to handle
1058  // this SELECT_CC, so we must lower it.
1059  SDValue HWTrue, HWFalse;
1060 
1061  if (CompareVT == MVT::f32) {
1062  HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1063  HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
1064  } else if (CompareVT == MVT::i32) {
1065  HWTrue = DAG.getConstant(-1, DL, CompareVT);
1066  HWFalse = DAG.getConstant(0, DL, CompareVT);
1067  }
1068  else {
1069  llvm_unreachable("Unhandled value type in LowerSELECT_CC");
1070  }
1071 
1072  // Lower this unsupported SELECT_CC into a combination of two supported
1073  // SELECT_CC operations.
1074  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1075 
1076  return DAG.getNode(ISD::SELECT_CC, DL, VT,
1077  Cond, HWFalse,
1078  True, False,
1079  DAG.getCondCode(ISD::SETNE));
1080 }
1081 
1082 /// LLVM generates byte-addressed pointers. For indirect addressing, we need to
1083 /// convert these pointers to a register index. Each register holds
1084 /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1085 /// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1086 /// for indirect addressing.
1087 SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1088  unsigned StackWidth,
1089  SelectionDAG &DAG) const {
1090  unsigned SRLPad;
1091  switch(StackWidth) {
1092  case 1:
1093  SRLPad = 2;
1094  break;
1095  case 2:
1096  SRLPad = 3;
1097  break;
1098  case 4:
1099  SRLPad = 4;
1100  break;
1101  default: llvm_unreachable("Invalid stack width");
1102  }
1103 
1104  SDLoc DL(Ptr);
1105  return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1106  DAG.getConstant(SRLPad, DL, MVT::i32));
1107 }
1108 
1109 void R600TargetLowering::getStackAddress(unsigned StackWidth,
1110  unsigned ElemIdx,
1111  unsigned &Channel,
1112  unsigned &PtrIncr) const {
1113  switch (StackWidth) {
1114  default:
1115  case 1:
1116  Channel = 0;
1117  if (ElemIdx > 0) {
1118  PtrIncr = 1;
1119  } else {
1120  PtrIncr = 0;
1121  }
1122  break;
1123  case 2:
1124  Channel = ElemIdx % 2;
1125  if (ElemIdx == 2) {
1126  PtrIncr = 1;
1127  } else {
1128  PtrIncr = 0;
1129  }
1130  break;
1131  case 4:
1132  Channel = ElemIdx;
1133  PtrIncr = 0;
1134  break;
1135  }
1136 }
1137 
1138 SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1139  SelectionDAG &DAG) const {
1140  SDLoc DL(Store);
1141  //TODO: Who creates the i8 stores?
1142  assert(Store->isTruncatingStore()
1143  || Store->getValue().getValueType() == MVT::i8);
1145 
1146  SDValue Mask;
1147  if (Store->getMemoryVT() == MVT::i8) {
1148  assert(Store->getAlignment() >= 1);
1149  Mask = DAG.getConstant(0xff, DL, MVT::i32);
1150  } else if (Store->getMemoryVT() == MVT::i16) {
1151  assert(Store->getAlignment() >= 2);
1152  Mask = DAG.getConstant(0xffff, DL, MVT::i32);
1153  } else {
1154  llvm_unreachable("Unsupported private trunc store");
1155  }
1156 
1157  SDValue OldChain = Store->getChain();
1158  bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
1159  // Skip dummy
1160  SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
1161  SDValue BasePtr = Store->getBasePtr();
1162  SDValue Offset = Store->getOffset();
1163  EVT MemVT = Store->getMemoryVT();
1164 
1165  SDValue LoadPtr = BasePtr;
1166  if (!Offset.isUndef()) {
1167  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1168  }
1169 
1170  // Get dword location
1171  // TODO: this should be eliminated by the future SHR ptr, 2
1172  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1173  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1174 
1175  // Load dword
1176  // TODO: can we be smarter about machine pointer info?
1179  SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1180 
1181  Chain = Dst.getValue(1);
1182 
1183  // Get offset in dword
1184  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1185  DAG.getConstant(0x3, DL, MVT::i32));
1186 
1187  // Convert byte offset to bit shift
1188  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1189  DAG.getConstant(3, DL, MVT::i32));
1190 
1191  // TODO: Contrary to the name of the functiom,
1192  // it also handles sub i32 non-truncating stores (like i1)
1193  SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1194  Store->getValue());
1195 
1196  // Mask the value to the right type
1197  SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1198 
1199  // Shift the value in place
1200  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1201  MaskedValue, ShiftAmt);
1202 
1203  // Shift the mask in place
1204  SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
1205 
1206  // Invert the mask. NOTE: if we had native ROL instructions we could
1207  // use inverted mask
1208  DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
1209 
1210  // Cleanup the target bits
1211  Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1212 
1213  // Add the new bits
1214  SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1215 
1216  // Store dword
1217  // TODO: Can we be smarter about MachinePointerInfo?
1218  SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo);
1219 
1220  // If we are part of expanded vector, make our neighbors depend on this store
1221  if (VectorTrunc) {
1222  // Make all other vector elements depend on this store
1223  Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
1224  DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
1225  }
1226  return NewStore;
1227 }
1228 
1229 SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1230  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1231  unsigned AS = StoreNode->getAddressSpace();
1232 
1233  SDValue Chain = StoreNode->getChain();
1234  SDValue Ptr = StoreNode->getBasePtr();
1235  SDValue Value = StoreNode->getValue();
1236 
1237  EVT VT = Value.getValueType();
1238  EVT MemVT = StoreNode->getMemoryVT();
1239  EVT PtrVT = Ptr.getValueType();
1240 
1241  SDLoc DL(Op);
1242 
1243  // Neither LOCAL nor PRIVATE can do vectors at the moment
1244  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
1245  VT.isVector()) {
1246  if ((AS == AMDGPUAS::PRIVATE_ADDRESS) &&
1247  StoreNode->isTruncatingStore()) {
1248  // Add an extra level of chain to isolate this vector
1249  SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
1250  // TODO: can the chain be replaced without creating a new store?
1251  SDValue NewStore = DAG.getTruncStore(
1252  NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(),
1253  MemVT, StoreNode->getAlignment(),
1254  StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo());
1255  StoreNode = cast<StoreSDNode>(NewStore);
1256  }
1257 
1258  return scalarizeVectorStore(StoreNode, DAG);
1259  }
1260 
1261  unsigned Align = StoreNode->getAlignment();
1262  if (Align < MemVT.getStoreSize() &&
1263  !allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) {
1264  return expandUnalignedStore(StoreNode, DAG);
1265  }
1266 
1267  SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
1268  DAG.getConstant(2, DL, PtrVT));
1269 
1270  if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
1271  // It is beneficial to create MSKOR here instead of combiner to avoid
1272  // artificial dependencies introduced by RMW
1273  if (StoreNode->isTruncatingStore()) {
1274  assert(VT.bitsLE(MVT::i32));
1275  SDValue MaskConstant;
1276  if (MemVT == MVT::i8) {
1277  MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
1278  } else {
1279  assert(MemVT == MVT::i16);
1280  assert(StoreNode->getAlignment() >= 2);
1281  MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
1282  }
1283 
1284  SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
1285  DAG.getConstant(0x00000003, DL, PtrVT));
1286  SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1287  DAG.getConstant(3, DL, VT));
1288 
1289  // Put the mask in correct place
1290  SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
1291 
1292  // Put the value bits in correct place
1293  SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1294  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
1295 
1296  // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1297  // vector instead.
1298  SDValue Src[4] = {
1299  ShiftedValue,
1300  DAG.getConstant(0, DL, MVT::i32),
1301  DAG.getConstant(0, DL, MVT::i32),
1302  Mask
1303  };
1304  SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
1305  SDValue Args[3] = { Chain, Input, DWordAddr };
1307  Op->getVTList(), Args, MemVT,
1308  StoreNode->getMemOperand());
1309  } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
1310  // Convert pointer from byte address to dword address.
1311  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1312 
1313  if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1314  llvm_unreachable("Truncated and indexed stores not supported yet");
1315  } else {
1316  Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1317  }
1318  return Chain;
1319  }
1320  }
1321 
1322  // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
1323  if (AS != AMDGPUAS::PRIVATE_ADDRESS)
1324  return SDValue();
1325 
1326  if (MemVT.bitsLT(MVT::i32))
1327  return lowerPrivateTruncStore(StoreNode, DAG);
1328 
1329  // Standard i32+ store, tag it with DWORDADDR to note that the address
1330  // has been shifted
1331  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1332  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1333  return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1334  }
1335 
1336  // Tagged i32+ stores will be matched by patterns
1337  return SDValue();
1338 }
1339 
1340 // return (512 + (kc_bank << 12)
1341 static int
1343  switch (AddressSpace) {
1345  return 512;
1347  return 512 + 4096;
1349  return 512 + 4096 * 2;
1351  return 512 + 4096 * 3;
1353  return 512 + 4096 * 4;
1355  return 512 + 4096 * 5;
1357  return 512 + 4096 * 6;
1359  return 512 + 4096 * 7;
1361  return 512 + 4096 * 8;
1363  return 512 + 4096 * 9;
1365  return 512 + 4096 * 10;
1367  return 512 + 4096 * 11;
1369  return 512 + 4096 * 12;
1371  return 512 + 4096 * 13;
1373  return 512 + 4096 * 14;
1375  return 512 + 4096 * 15;
1376  default:
1377  return -1;
1378  }
1379 }
1380 
1381 SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1382  SelectionDAG &DAG) const {
1383  SDLoc DL(Op);
1384  LoadSDNode *Load = cast<LoadSDNode>(Op);
1386  EVT MemVT = Load->getMemoryVT();
1387  assert(Load->getAlignment() >= MemVT.getStoreSize());
1388 
1389  SDValue BasePtr = Load->getBasePtr();
1390  SDValue Chain = Load->getChain();
1391  SDValue Offset = Load->getOffset();
1392 
1393  SDValue LoadPtr = BasePtr;
1394  if (!Offset.isUndef()) {
1395  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1396  }
1397 
1398  // Get dword location
1399  // NOTE: this should be eliminated by the future SHR ptr, 2
1400  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1401  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1402 
1403  // Load dword
1404  // TODO: can we be smarter about machine pointer info?
1407  SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1408 
1409  // Get offset within the register.
1410  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1411  LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));
1412 
1413  // Bit offset of target byte (byteIdx * 8).
1414  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1415  DAG.getConstant(3, DL, MVT::i32));
1416 
1417  // Shift to the right.
1418  SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);
1419 
1420  // Eliminate the upper bits by setting them to ...
1421  EVT MemEltVT = MemVT.getScalarType();
1422 
1423  if (ExtType == ISD::SEXTLOAD) { // ... ones.
1424  SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1425  Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
1426  } else { // ... or zeros.
1427  Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
1428  }
1429 
1430  SDValue Ops[] = {
1431  Ret,
1432  Read.getValue(1) // This should be our output chain
1433  };
1434 
1435  return DAG.getMergeValues(Ops, DL);
1436 }
1437 
1438 SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1439  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1440  unsigned AS = LoadNode->getAddressSpace();
1441  EVT MemVT = LoadNode->getMemoryVT();
1443 
1444  if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1445  ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1446  return lowerPrivateExtLoad(Op, DAG);
1447  }
1448 
1449  SDLoc DL(Op);
1450  EVT VT = Op.getValueType();
1451  SDValue Chain = LoadNode->getChain();
1452  SDValue Ptr = LoadNode->getBasePtr();
1453 
1454  if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
1455  LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
1456  VT.isVector()) {
1457  return scalarizeVectorLoad(LoadNode, DAG);
1458  }
1459 
1460  // This is still used for explicit load from addrspace(8)
1461  int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1462  if (ConstantBlock > -1 &&
1463  ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1464  (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
1465  SDValue Result;
1466  if (isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
1467  isa<ConstantSDNode>(Ptr)) {
1468  return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG);
1469  } else {
1470  //TODO: Does this even work?
1471  // non-constant ptr can't be folded, keeps it as a v4f32 load
1472  Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
1473  DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1474  DAG.getConstant(4, DL, MVT::i32)),
1475  DAG.getConstant(LoadNode->getAddressSpace() -
1477  );
1478  }
1479 
1480  if (!VT.isVector()) {
1481  Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1482  DAG.getConstant(0, DL, MVT::i32));
1483  }
1484 
1485  SDValue MergedValues[2] = {
1486  Result,
1487  Chain
1488  };
1489  return DAG.getMergeValues(MergedValues, DL);
1490  }
1491 
1492  // For most operations returning SDValue() will result in the node being
1493  // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1494  // need to manually expand loads that may be legal in some address spaces and
1495  // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1496  // compute shaders, since the data is sign extended when it is uploaded to the
1497  // buffer. However SEXT loads from other address spaces are not supported, so
1498  // we need to expand them here.
1499  if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1500  EVT MemVT = LoadNode->getMemoryVT();
1501  assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1502  SDValue NewLoad = DAG.getExtLoad(
1503  ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
1504  LoadNode->getAlignment(), LoadNode->getMemOperand()->getFlags());
1505  SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1506  DAG.getValueType(MemVT));
1507 
1508  SDValue MergedValues[2] = { Res, Chain };
1509  return DAG.getMergeValues(MergedValues, DL);
1510  }
1511 
1512  if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1513  return SDValue();
1514  }
1515 
1516  // DWORDADDR ISD marks already shifted address
1517  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1518  assert(VT == MVT::i32);
1519  Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
1520  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);
1521  return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
1522  }
1523  return SDValue();
1524 }
1525 
1526 SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1527  SDValue Chain = Op.getOperand(0);
1528  SDValue Cond = Op.getOperand(1);
1529  SDValue Jump = Op.getOperand(2);
1530 
1531  return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1532  Chain, Jump, Cond);
1533 }
1534 
1535 SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1536  SelectionDAG &DAG) const {
1537  MachineFunction &MF = DAG.getMachineFunction();
1538  const R600FrameLowering *TFL = Subtarget->getFrameLowering();
1539 
1540  FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1541 
1542  unsigned FrameIndex = FIN->getIndex();
1543  unsigned IgnoredFrameReg;
1544  unsigned Offset =
1545  TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1546  return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
1547  Op.getValueType());
1548 }
1549 
1551  bool IsVarArg) const {
1552  switch (CC) {
1555  case CallingConv::C:
1556  case CallingConv::Fast:
1557  case CallingConv::Cold:
1558  llvm_unreachable("kernels should not be handled here");
1566  return CC_R600;
1567  default:
1568  report_fatal_error("Unsupported calling convention.");
1569  }
1570 }
1571 
1572 /// XXX Only kernel functions are supported, so we can assume for now that
1573 /// every function is a kernel function, but in the future we should use
1574 /// separate calling conventions for kernel and non-kernel functions.
1576  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1577  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1578  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1580  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1581  *DAG.getContext());
1582  MachineFunction &MF = DAG.getMachineFunction();
1584 
1585  if (AMDGPU::isShader(CallConv)) {
1586  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
1587  } else {
1588  analyzeFormalArgumentsCompute(CCInfo, Ins);
1589  }
1590 
1591  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
1592  CCValAssign &VA = ArgLocs[i];
1593  const ISD::InputArg &In = Ins[i];
1594  EVT VT = In.VT;
1595  EVT MemVT = VA.getLocVT();
1596  if (!VT.isVector() && MemVT.isVector()) {
1597  // Get load source type if scalarized.
1598  MemVT = MemVT.getVectorElementType();
1599  }
1600 
1601  if (AMDGPU::isShader(CallConv)) {
1602  unsigned Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
1603  SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1604  InVals.push_back(Register);
1605  continue;
1606  }
1607 
1610 
1611  // i64 isn't a legal type, so the register type used ends up as i32, which
1612  // isn't expected here. It attempts to create this sextload, but it ends up
1613  // being invalid. Somehow this seems to work with i64 arguments, but breaks
1614  // for <1 x i64>.
1615 
1616  // The first 36 bytes of the input buffer contains information about
1617  // thread group and global sizes.
1619  if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1620  // FIXME: This should really check the extload type, but the handling of
1621  // extload vector parameters seems to be broken.
1622 
1623  // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1624  Ext = ISD::SEXTLOAD;
1625  }
1626 
1627  // Compute the offset from the value.
1628  // XXX - I think PartOffset should give you this, but it seems to give the
1629  // size of the register which isn't useful.
1630 
1631  unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
1632  unsigned PartOffset = VA.getLocMemOffset();
1633  unsigned Alignment = MinAlign(VT.getStoreSize(), PartOffset);
1634 
1635  MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1636  SDValue Arg = DAG.getLoad(
1637  ISD::UNINDEXED, Ext, VT, DL, Chain,
1638  DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32),
1639  PtrInfo,
1640  MemVT, Alignment, MachineMemOperand::MONonTemporal |
1643 
1644  InVals.push_back(Arg);
1645  }
1646  return Chain;
1647 }
1648 
1650  EVT VT) const {
1651  if (!VT.isVector())
1652  return MVT::i32;
1654 }
1655 
1657  const SelectionDAG &DAG) const {
1658  // Local and Private addresses do not handle vectors. Limit to i32
1659  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS)) {
1660  return (MemVT.getSizeInBits() <= 32);
1661  }
1662  return true;
1663 }
1664 
1666  unsigned AddrSpace,
1667  unsigned Align,
1668  bool *IsFast) const {
1669  if (IsFast)
1670  *IsFast = false;
1671 
1672  if (!VT.isSimple() || VT == MVT::Other)
1673  return false;
1674 
1675  if (VT.bitsLT(MVT::i32))
1676  return false;
1677 
1678  // TODO: This is a rough estimate.
1679  if (IsFast)
1680  *IsFast = true;
1681 
1682  return VT.bitsGT(MVT::i32) && Align % 4 == 0;
1683 }
1684 
1686  SelectionDAG &DAG, SDValue VectorEntry,
1687  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1688  assert(RemapSwizzle.empty());
1689 
1690  SDLoc DL(VectorEntry);
1691  EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1692 
1693  SDValue NewBldVec[4];
1694  for (unsigned i = 0; i < 4; i++)
1695  NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1696  DAG.getIntPtrConstant(i, DL));
1697 
1698  for (unsigned i = 0; i < 4; i++) {
1699  if (NewBldVec[i].isUndef())
1700  // We mask write here to teach later passes that the ith element of this
1701  // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1702  // break false dependencies and additionnaly make assembly easier to read.
1703  RemapSwizzle[i] = 7; // SEL_MASK_WRITE
1704  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1705  if (C->isZero()) {
1706  RemapSwizzle[i] = 4; // SEL_0
1707  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1708  } else if (C->isExactlyValue(1.0)) {
1709  RemapSwizzle[i] = 5; // SEL_1
1710  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1711  }
1712  }
1713 
1714  if (NewBldVec[i].isUndef())
1715  continue;
1716  for (unsigned j = 0; j < i; j++) {
1717  if (NewBldVec[i] == NewBldVec[j]) {
1718  NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1719  RemapSwizzle[i] = j;
1720  break;
1721  }
1722  }
1723  }
1724 
1725  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1726  NewBldVec);
1727 }
1728 
1730  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1731  assert(RemapSwizzle.empty());
1732 
1733  SDLoc DL(VectorEntry);
1734  EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1735 
1736  SDValue NewBldVec[4];
1737  bool isUnmovable[4] = {false, false, false, false};
1738  for (unsigned i = 0; i < 4; i++)
1739  NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1740  DAG.getIntPtrConstant(i, DL));
1741 
1742  for (unsigned i = 0; i < 4; i++) {
1743  RemapSwizzle[i] = i;
1744  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1745  unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1746  ->getZExtValue();
1747  if (i == Idx)
1748  isUnmovable[Idx] = true;
1749  }
1750  }
1751 
1752  for (unsigned i = 0; i < 4; i++) {
1753  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1754  unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1755  ->getZExtValue();
1756  if (isUnmovable[Idx])
1757  continue;
1758  // Swap i and Idx
1759  std::swap(NewBldVec[Idx], NewBldVec[i]);
1760  std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1761  break;
1762  }
1763  }
1764 
1765  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1766  NewBldVec);
1767 }
1768 
1769 SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
1770  SelectionDAG &DAG,
1771  const SDLoc &DL) const {
1772  // Old -> New swizzle values
1773  DenseMap<unsigned, unsigned> SwizzleRemap;
1774 
1775  BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1776  for (unsigned i = 0; i < 4; i++) {
1777  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1778  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1779  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1780  }
1781 
1782  SwizzleRemap.clear();
1783  BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1784  for (unsigned i = 0; i < 4; i++) {
1785  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1786  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1787  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1788  }
1789 
1790  return BuildVector;
1791 }
1792 
1793 SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block,
1794  SelectionDAG &DAG) const {
1795  SDLoc DL(LoadNode);
1796  EVT VT = LoadNode->getValueType(0);
1797  SDValue Chain = LoadNode->getChain();
1798  SDValue Ptr = LoadNode->getBasePtr();
1799  assert (isa<ConstantSDNode>(Ptr));
1800 
1801  //TODO: Support smaller loads
1802  if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))
1803  return SDValue();
1804 
1805  if (LoadNode->getAlignment() < 4)
1806  return SDValue();
1807 
1808  int ConstantBlock = ConstantAddressBlock(Block);
1809 
1810  SDValue Slots[4];
1811  for (unsigned i = 0; i < 4; i++) {
1812  // We want Const position encoded with the following formula :
1813  // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1814  // const_index is Ptr computed by llvm using an alignment of 16.
1815  // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1816  // then div by 4 at the ISel step
1817  SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1818  DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
1819  Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1820  }
1821  EVT NewVT = MVT::v4i32;
1822  unsigned NumElements = 4;
1823  if (VT.isVector()) {
1824  NewVT = VT;
1825  NumElements = VT.getVectorNumElements();
1826  }
1827  SDValue Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
1828  if (!VT.isVector()) {
1829  Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1830  DAG.getConstant(0, DL, MVT::i32));
1831  }
1832  SDValue MergedValues[2] = {
1833  Result,
1834  Chain
1835  };
1836  return DAG.getMergeValues(MergedValues, DL);
1837 }
1838 
1839 //===----------------------------------------------------------------------===//
1840 // Custom DAG Optimizations
1841 //===----------------------------------------------------------------------===//
1842 
1844  DAGCombinerInfo &DCI) const {
1845  SelectionDAG &DAG = DCI.DAG;
1846  SDLoc DL(N);
1847 
1848  switch (N->getOpcode()) {
1849  // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1850  case ISD::FP_ROUND: {
1851  SDValue Arg = N->getOperand(0);
1852  if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
1853  return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),
1854  Arg.getOperand(0));
1855  }
1856  break;
1857  }
1858 
1859  // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1860  // (i32 select_cc f32, f32, -1, 0 cc)
1861  //
1862  // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1863  // this to one of the SET*_DX10 instructions.
1864  case ISD::FP_TO_SINT: {
1865  SDValue FNeg = N->getOperand(0);
1866  if (FNeg.getOpcode() != ISD::FNEG) {
1867  return SDValue();
1868  }
1869  SDValue SelectCC = FNeg.getOperand(0);
1870  if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1871  SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1872  SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1873  !isHWTrueValue(SelectCC.getOperand(2)) ||
1874  !isHWFalseValue(SelectCC.getOperand(3))) {
1875  return SDValue();
1876  }
1877 
1878  return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
1879  SelectCC.getOperand(0), // LHS
1880  SelectCC.getOperand(1), // RHS
1881  DAG.getConstant(-1, DL, MVT::i32), // True
1882  DAG.getConstant(0, DL, MVT::i32), // False
1883  SelectCC.getOperand(4)); // CC
1884 
1885  break;
1886  }
1887 
1888  // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1889  // => build_vector elt0, ... , NewEltIdx, ... , eltN
1890  case ISD::INSERT_VECTOR_ELT: {
1891  SDValue InVec = N->getOperand(0);
1892  SDValue InVal = N->getOperand(1);
1893  SDValue EltNo = N->getOperand(2);
1894 
1895  // If the inserted element is an UNDEF, just use the input vector.
1896  if (InVal.isUndef())
1897  return InVec;
1898 
1899  EVT VT = InVec.getValueType();
1900 
1901  // If we can't generate a legal BUILD_VECTOR, exit
1903  return SDValue();
1904 
1905  // Check that we know which element is being inserted
1906  if (!isa<ConstantSDNode>(EltNo))
1907  return SDValue();
1908  unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1909 
1910  // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1911  // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1912  // vector elements.
1914  if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1915  Ops.append(InVec.getNode()->op_begin(),
1916  InVec.getNode()->op_end());
1917  } else if (InVec.isUndef()) {
1918  unsigned NElts = VT.getVectorNumElements();
1919  Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1920  } else {
1921  return SDValue();
1922  }
1923 
1924  // Insert the element
1925  if (Elt < Ops.size()) {
1926  // All the operands of BUILD_VECTOR must have the same type;
1927  // we enforce that here.
1928  EVT OpVT = Ops[0].getValueType();
1929  if (InVal.getValueType() != OpVT)
1930  InVal = OpVT.bitsGT(InVal.getValueType()) ?
1931  DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
1932  DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
1933  Ops[Elt] = InVal;
1934  }
1935 
1936  // Return the new vector
1937  return DAG.getBuildVector(VT, DL, Ops);
1938  }
1939 
1940  // Extract_vec (Build_vector) generated by custom lowering
1941  // also needs to be customly combined
1942  case ISD::EXTRACT_VECTOR_ELT: {
1943  SDValue Arg = N->getOperand(0);
1944  if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1945  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1946  unsigned Element = Const->getZExtValue();
1947  return Arg->getOperand(Element);
1948  }
1949  }
1950  if (Arg.getOpcode() == ISD::BITCAST &&
1951  Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
1954  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1955  unsigned Element = Const->getZExtValue();
1956  return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),
1957  Arg->getOperand(0).getOperand(Element));
1958  }
1959  }
1960  break;
1961  }
1962 
1963  case ISD::SELECT_CC: {
1964  // Try common optimizations
1966  return Ret;
1967 
1968  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1969  // selectcc x, y, a, b, inv(cc)
1970  //
1971  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1972  // selectcc x, y, a, b, cc
1973  SDValue LHS = N->getOperand(0);
1974  if (LHS.getOpcode() != ISD::SELECT_CC) {
1975  return SDValue();
1976  }
1977 
1978  SDValue RHS = N->getOperand(1);
1979  SDValue True = N->getOperand(2);
1980  SDValue False = N->getOperand(3);
1981  ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
1982 
1983  if (LHS.getOperand(2).getNode() != True.getNode() ||
1984  LHS.getOperand(3).getNode() != False.getNode() ||
1985  RHS.getNode() != False.getNode()) {
1986  return SDValue();
1987  }
1988 
1989  switch (NCC) {
1990  default: return SDValue();
1991  case ISD::SETNE: return LHS;
1992  case ISD::SETEQ: {
1993  ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1994  LHSCC = ISD::getSetCCInverse(LHSCC,
1995  LHS.getOperand(0).getValueType().isInteger());
1996  if (DCI.isBeforeLegalizeOps() ||
1998  return DAG.getSelectCC(DL,
1999  LHS.getOperand(0),
2000  LHS.getOperand(1),
2001  LHS.getOperand(2),
2002  LHS.getOperand(3),
2003  LHSCC);
2004  break;
2005  }
2006  }
2007  return SDValue();
2008  }
2009 
2010  case AMDGPUISD::R600_EXPORT: {
2011  SDValue Arg = N->getOperand(1);
2012  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2013  break;
2014 
2015  SDValue NewArgs[8] = {
2016  N->getOperand(0), // Chain
2017  SDValue(),
2018  N->getOperand(2), // ArrayBase
2019  N->getOperand(3), // Type
2020  N->getOperand(4), // SWZ_X
2021  N->getOperand(5), // SWZ_Y
2022  N->getOperand(6), // SWZ_Z
2023  N->getOperand(7) // SWZ_W
2024  };
2025  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
2026  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
2027  }
2028  case AMDGPUISD::TEXTURE_FETCH: {
2029  SDValue Arg = N->getOperand(1);
2030  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2031  break;
2032 
2033  SDValue NewArgs[19] = {
2034  N->getOperand(0),
2035  N->getOperand(1),
2036  N->getOperand(2),
2037  N->getOperand(3),
2038  N->getOperand(4),
2039  N->getOperand(5),
2040  N->getOperand(6),
2041  N->getOperand(7),
2042  N->getOperand(8),
2043  N->getOperand(9),
2044  N->getOperand(10),
2045  N->getOperand(11),
2046  N->getOperand(12),
2047  N->getOperand(13),
2048  N->getOperand(14),
2049  N->getOperand(15),
2050  N->getOperand(16),
2051  N->getOperand(17),
2052  N->getOperand(18),
2053  };
2054  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
2055  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
2056  }
2057 
2058  case ISD::LOAD: {
2059  LoadSDNode *LoadNode = cast<LoadSDNode>(N);
2060  SDValue Ptr = LoadNode->getBasePtr();
2061  if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&
2062  isa<ConstantSDNode>(Ptr))
2063  return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG);
2064  break;
2065  }
2066 
2067  default: break;
2068  }
2069 
2071 }
2072 
2073 bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
2074  SDValue &Src, SDValue &Neg, SDValue &Abs,
2075  SDValue &Sel, SDValue &Imm,
2076  SelectionDAG &DAG) const {
2077  const R600InstrInfo *TII = Subtarget->getInstrInfo();
2078  if (!Src.isMachineOpcode())
2079  return false;
2080 
2081  switch (Src.getMachineOpcode()) {
2082  case R600::FNEG_R600:
2083  if (!Neg.getNode())
2084  return false;
2085  Src = Src.getOperand(0);
2086  Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
2087  return true;
2088  case R600::FABS_R600:
2089  if (!Abs.getNode())
2090  return false;
2091  Src = Src.getOperand(0);
2092  Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
2093  return true;
2094  case R600::CONST_COPY: {
2095  unsigned Opcode = ParentNode->getMachineOpcode();
2096  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2097 
2098  if (!Sel.getNode())
2099  return false;
2100 
2101  SDValue CstOffset = Src.getOperand(0);
2102  if (ParentNode->getValueType(0).isVector())
2103  return false;
2104 
2105  // Gather constants values
2106  int SrcIndices[] = {
2107  TII->getOperandIdx(Opcode, R600::OpName::src0),
2108  TII->getOperandIdx(Opcode, R600::OpName::src1),
2109  TII->getOperandIdx(Opcode, R600::OpName::src2),
2110  TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2111  TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2112  TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2113  TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2114  TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2115  TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2116  TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2117  TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2118  };
2119  std::vector<unsigned> Consts;
2120  for (int OtherSrcIdx : SrcIndices) {
2121  int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2122  if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2123  continue;
2124  if (HasDst) {
2125  OtherSrcIdx--;
2126  OtherSelIdx--;
2127  }
2128  if (RegisterSDNode *Reg =
2129  dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2130  if (Reg->getReg() == R600::ALU_CONST) {
2131  ConstantSDNode *Cst
2132  = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
2133  Consts.push_back(Cst->getZExtValue());
2134  }
2135  }
2136  }
2137 
2138  ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
2139  Consts.push_back(Cst->getZExtValue());
2140  if (!TII->fitsConstReadLimitations(Consts)) {
2141  return false;
2142  }
2143 
2144  Sel = CstOffset;
2145  Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);
2146  return true;
2147  }
2148  case R600::MOV_IMM_GLOBAL_ADDR:
2149  // Check if the Imm slot is used. Taken from below.
2150  if (cast<ConstantSDNode>(Imm)->getZExtValue())
2151  return false;
2152  Imm = Src.getOperand(0);
2153  Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);
2154  return true;
2155  case R600::MOV_IMM_I32:
2156  case R600::MOV_IMM_F32: {
2157  unsigned ImmReg = R600::ALU_LITERAL_X;
2158  uint64_t ImmValue = 0;
2159 
2160  if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {
2162  float FloatValue = FPC->getValueAPF().convertToFloat();
2163  if (FloatValue == 0.0) {
2164  ImmReg = R600::ZERO;
2165  } else if (FloatValue == 0.5) {
2166  ImmReg = R600::HALF;
2167  } else if (FloatValue == 1.0) {
2168  ImmReg = R600::ONE;
2169  } else {
2170  ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2171  }
2172  } else {
2174  uint64_t Value = C->getZExtValue();
2175  if (Value == 0) {
2176  ImmReg = R600::ZERO;
2177  } else if (Value == 1) {
2178  ImmReg = R600::ONE_INT;
2179  } else {
2180  ImmValue = Value;
2181  }
2182  }
2183 
2184  // Check that we aren't already using an immediate.
2185  // XXX: It's possible for an instruction to have more than one
2186  // immediate operand, but this is not supported yet.
2187  if (ImmReg == R600::ALU_LITERAL_X) {
2188  if (!Imm.getNode())
2189  return false;
2191  assert(C);
2192  if (C->getZExtValue())
2193  return false;
2194  Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
2195  }
2196  Src = DAG.getRegister(ImmReg, MVT::i32);
2197  return true;
2198  }
2199  default:
2200  return false;
2201  }
2202 }
2203 
2204 /// Fold the instructions after selecting them
2205 SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2206  SelectionDAG &DAG) const {
2207  const R600InstrInfo *TII = Subtarget->getInstrInfo();
2208  if (!Node->isMachineOpcode())
2209  return Node;
2210 
2211  unsigned Opcode = Node->getMachineOpcode();
2212  SDValue FakeOp;
2213 
2214  std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
2215 
2216  if (Opcode == R600::DOT_4) {
2217  int OperandIdx[] = {
2218  TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2219  TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2220  TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2221  TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2222  TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2223  TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2224  TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2225  TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2226  };
2227  int NegIdx[] = {
2228  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),
2229  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),
2230  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),
2231  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),
2232  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),
2233  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),
2234  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),
2235  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)
2236  };
2237  int AbsIdx[] = {
2238  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),
2239  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),
2240  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),
2241  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),
2242  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),
2243  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),
2244  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),
2245  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)
2246  };
2247  for (unsigned i = 0; i < 8; i++) {
2248  if (OperandIdx[i] < 0)
2249  return Node;
2250  SDValue &Src = Ops[OperandIdx[i] - 1];
2251  SDValue &Neg = Ops[NegIdx[i] - 1];
2252  SDValue &Abs = Ops[AbsIdx[i] - 1];
2253  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2254  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2255  if (HasDst)
2256  SelIdx--;
2257  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2258  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2259  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2260  }
2261  } else if (Opcode == R600::REG_SEQUENCE) {
2262  for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2263  SDValue &Src = Ops[i];
2264  if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
2265  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2266  }
2267  } else {
2268  if (!TII->hasInstrModifiers(Opcode))
2269  return Node;
2270  int OperandIdx[] = {
2271  TII->getOperandIdx(Opcode, R600::OpName::src0),
2272  TII->getOperandIdx(Opcode, R600::OpName::src1),
2273  TII->getOperandIdx(Opcode, R600::OpName::src2)
2274  };
2275  int NegIdx[] = {
2276  TII->getOperandIdx(Opcode, R600::OpName::src0_neg),
2277  TII->getOperandIdx(Opcode, R600::OpName::src1_neg),
2278  TII->getOperandIdx(Opcode, R600::OpName::src2_neg)
2279  };
2280  int AbsIdx[] = {
2281  TII->getOperandIdx(Opcode, R600::OpName::src0_abs),
2282  TII->getOperandIdx(Opcode, R600::OpName::src1_abs),
2283  -1
2284  };
2285  for (unsigned i = 0; i < 3; i++) {
2286  if (OperandIdx[i] < 0)
2287  return Node;
2288  SDValue &Src = Ops[OperandIdx[i] - 1];
2289  SDValue &Neg = Ops[NegIdx[i] - 1];
2290  SDValue FakeAbs;
2291  SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2292  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2293  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2294  int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);
2295  if (HasDst) {
2296  SelIdx--;
2297  ImmIdx--;
2298  }
2299  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2300  SDValue &Imm = Ops[ImmIdx];
2301  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
2302  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2303  }
2304  }
2305 
2306  return Node;
2307 }
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:571
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:538
static SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
const MachineInstrBuilder & add(const MachineOperand &MO) const
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
EVT getValueType() const
Return the ValueType of the referenced return value.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
const SDValue & getOffset() const
bool isUndef() const
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:35
const GlobalValue * getGlobal() const
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1563
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
AMDGPU specific subclass of TargetSubtarget.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:650
This class represents lattice values for constants.
Definition: AllocatorList.h:24
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types...
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Interface definition for R600InstrInfo.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:260
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
bool canMergeStoresTo(unsigned AS, EVT MemVT, const SelectionDAG &DAG) const override
Returns if it&#39;s reasonable to merge stores to MemVT size.
void addFlag(MachineInstr &MI, unsigned Operand, unsigned Flag) const
Add one of the MO_FLAG* flags to the specified Operand.
const SDValue & getBasePtr() const
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:223
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
const SDValue & getValue() const
SDVTList getVTList() const
unsigned Reg
static PointerType * getInt32PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:228
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:630
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the &#39;usesCustomInserter&#39; fla...
float convertToFloat() const
Definition: APFloat.h:1098
const SDValue & getChain() const
Function Alias Analysis Results
Address space for private memory.
Definition: AMDGPU.h:261
unsigned getAlignment() const
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:306
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:141
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:435
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:210
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations...
Definition: ISDOpcodes.h:456
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:192
Pointer to the start of the shader&#39;s constant data.
Address space for constant memory (VTX2)
Definition: AMDGPU.h:259
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:198
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
#define MO_FLAG_ABS
Definition: R600Defines.h:19
Shift and rotation operations.
Definition: ISDOpcodes.h:410
const ConstantFP * getFPImm() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:412
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:137
int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
static Optional< unsigned > getOpcode(ArrayRef< VPValue *> Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:197
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
op_iterator op_end() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn&#39;t supported on the target and indicate what to d...
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:304
The memory access is dereferenceable (i.e., doesn&#39;t trap).
int getSelIdx(unsigned Opcode, unsigned SrcIdx) const
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
void setImmOperand(MachineInstr &MI, unsigned Op, int64_t Imm) const
Helper function for setting instruction flag values.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:460
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:401
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Interface to describe a layout of a stack frame on an AMDGPU target.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
This file implements a class to represent arbitrary precision integral constant values and operations...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool hasInstrModifiers(unsigned Opcode) const
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:298
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use...
Definition: CallingConv.h:221
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:398
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
#define MO_FLAG_NEG
Definition: R600Defines.h:18
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:852
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:959
op_iterator op_begin() const
MachineInstrBuilder buildDefaultInstruction(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Opcode, unsigned DstReg, unsigned Src0Reg, unsigned Src1Reg=0) const
buildDefaultInstruction - This function returns a MachineInstr with all the instruction modifiers ini...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:576
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:189
Class to represent pointers.
Definition: DerivedTypes.h:467
This class is used to represent ISD::STORE nodes.
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:524
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:327
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
const Value * getValue() const
Return the base address of the memory access.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:176
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:229
const R600FrameLowering * getFrameLowering() const override
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:610
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool hasFP32Denormals() const
const SDValue & getBasePtr() const
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:235
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:166
unsigned const MachineRegisterInfo * MRI
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1401
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Machine Value Type.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
Simple binary floating point operators.
Definition: ISDOpcodes.h:283
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
bool isMachineOpcode() const
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273
static int ConstantAddressBlock(unsigned AddressSpace)
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
const SDValue & getOperand(unsigned Num) const
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:934
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions...
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:332
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:247
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:232
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
const SDValue & getOffset() const
This file declares a class to represent arbitrary precision floating point values and provide a varie...
Address space for indirect addressible parameter memory (VTX1)
Definition: AMDGPU.h:268
Address space for local memory.
Definition: AMDGPU.h:260
const R600InstrInfo * getInstrInfo() const override
unsigned getMachineOpcode() const
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:216
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
int getOperandIdx(const MachineInstr &MI, unsigned Op) const
Get the index of Op in the MachineInstr.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
XXX Only kernel functions are supported, so we can assume for now that every function is a kernel fun...
The memory access is non-temporal.
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, unsigned Size=0)
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the &#39;usesCustomInserter&#39; fla...
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y)...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:416
Extended Value Type.
Definition: ValueTypes.h:34
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:256
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1415
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:195
size_t size() const
Definition: SmallVector.h:53
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool fitsConstReadLimitations(const std::vector< MachineInstr *> &) const
An instruction group can only access 2 channel pair (either [XY] or [ZW]) from KCache bank on R700+...
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value...
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
unsigned getAddressSpace() const
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
const APFloat & getValueAPF() const
Definition: Constants.h:303
const R600RegisterInfo * getRegisterInfo() const override
static bool isUndef(ArrayRef< int > Mask)
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, const TargetRegisterClass *RC, unsigned Reg, EVT VT) const
CCState - This class holds information needed while lowering arguments and return values...
SDValue scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:339
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:265
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:222
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:734
CCValAssign - Represent assignment of one arg/retval to a location.
AddressSpace
Definition: NVPTXBaseInfo.h:22
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:96
int getLDSNoRetOp(uint16_t Opcode)
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:644
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
An SDNode that represents everything that will be needed to construct a MachineInstr.
Promote Memory to Register
Definition: Mem2Reg.cpp:110
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
Return the operation corresponding to !(X op Y), where &#39;op&#39; is a valid SetCC operation.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
int64_t getImm() const
static mvt_range integer_valuetypes()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
EVT getMemoryVT() const
Return the type of the in-memory value.
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
bool isShader(CallingConv::ID cc)
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:420
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:241
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:471
bool use_empty(unsigned RegNo) const
use_empty - Return true if there are no instructions using the specified register.
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors...
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:312
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:394
bool isLDSRetInstr(unsigned Opcode) const
bool hasBCNT(unsigned Size) const
amdgpu Simplify well known AMD library false Value Value * Arg
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
unsigned getOrigArgIndex() const
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:387
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:705
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getLocMemOffset() const
R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI)
R600 DAG Lowering interface definition.
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:486
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:208
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:614
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Flags getFlags() const
Return the raw flags of the source value,.
The memory access always returns the same value (or traps).
iterator end()
Definition: DenseMap.h:109
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
unsigned getOpcode() const
SDValue getValue(unsigned R) const
LLVM_NODISCARD bool empty() const
Definition: DenseMap.h:123
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getCondCode(ISD::CondCode Cond)
const MachinePointerInfo & getPointerInfo() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LLVM Value Representation.
Definition: Value.h:73
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:302
SDValue getRegister(unsigned Reg, EVT VT)
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC&#39;s if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:962
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, bool *IsFast) const override
Determine if the target supports unaligned memory accesses.
SDValue getValueType(EVT)
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
static bool isEOP(MachineBasicBlock::iterator I)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
IRTranslator LLVM IR MI
const APFloat & getValueAPF() const
#define MO_FLAG_MASK
Definition: R600Defines.h:20
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations...
Definition: ISDOpcodes.h:306
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:443
APInt bitcastToAPInt() const
Definition: APFloat.h:1094
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:198
Conversion operators.
Definition: ISDOpcodes.h:465
const SDValue & getOperand(unsigned i) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:789
unsigned getLocReg() const
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:474
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:126
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override
Return the ValueType of the result of SETCC operations.
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:584
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:785
MVT getVectorIdxTy(const DataLayout &) const override
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:201
LLVMContext * getContext() const
Definition: SelectionDAG.h:407
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
MachineInstr * buildMovImm(MachineBasicBlock &BB, MachineBasicBlock::iterator I, unsigned DstReg, uint64_t Imm) const
#define MO_FLAG_PUSH
Definition: R600Defines.h:21
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:622
This class is used to represent ISD::LOAD nodes.
unsigned getStackWidth(const MachineFunction &MF) const
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.