LLVM  8.0.1
NVPTXAsmPrinter.cpp
Go to the documentation of this file.
1 //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains a printer that converts from our internal representation
11 // of machine-dependent LLVM code to NVPTX assembly language.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "NVPTXAsmPrinter.h"
20 #include "NVPTX.h"
21 #include "NVPTXMCExpr.h"
23 #include "NVPTXRegisterInfo.h"
24 #include "NVPTXSubtarget.h"
25 #include "NVPTXTargetMachine.h"
26 #include "NVPTXUtilities.h"
27 #include "cl_common_defines.h"
28 #include "llvm/ADT/APFloat.h"
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/DenseSet.h"
32 #include "llvm/ADT/SmallString.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/StringExtras.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/ADT/Triple.h"
37 #include "llvm/ADT/Twine.h"
39 #include "llvm/CodeGen/Analysis.h"
51 #include "llvm/IR/Attributes.h"
52 #include "llvm/IR/BasicBlock.h"
53 #include "llvm/IR/Constant.h"
54 #include "llvm/IR/Constants.h"
55 #include "llvm/IR/DataLayout.h"
56 #include "llvm/IR/DebugInfo.h"
58 #include "llvm/IR/DebugLoc.h"
59 #include "llvm/IR/DerivedTypes.h"
60 #include "llvm/IR/Function.h"
61 #include "llvm/IR/GlobalValue.h"
62 #include "llvm/IR/GlobalVariable.h"
63 #include "llvm/IR/Instruction.h"
64 #include "llvm/IR/LLVMContext.h"
65 #include "llvm/IR/Module.h"
66 #include "llvm/IR/Operator.h"
67 #include "llvm/IR/Type.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/MC/MCExpr.h"
70 #include "llvm/MC/MCInst.h"
71 #include "llvm/MC/MCInstrDesc.h"
72 #include "llvm/MC/MCStreamer.h"
73 #include "llvm/MC/MCSymbol.h"
74 #include "llvm/Support/Casting.h"
78 #include "llvm/Support/Path.h"
84 #include <cassert>
85 #include <cstdint>
86 #include <cstring>
87 #include <new>
88 #include <string>
89 #include <utility>
90 #include <vector>
91 
92 using namespace llvm;
93 
94 #define DEPOTNAME "__local_depot"
95 
96 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
97 /// depends.
98 static void
101  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
102  Globals.insert(GV);
103  else {
104  if (const User *U = dyn_cast<User>(V)) {
105  for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
106  DiscoverDependentGlobals(U->getOperand(i), Globals);
107  }
108  }
109  }
110 }
111 
112 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
113 /// instances to be emitted, but only after any dependents have been added
114 /// first.s
115 static void
120  // Have we already visited this one?
121  if (Visited.count(GV))
122  return;
123 
124  // Do we have a circular dependency?
125  if (!Visiting.insert(GV).second)
126  report_fatal_error("Circular dependency found in global variable set");
127 
128  // Make sure we visit all dependents first
130  for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
131  DiscoverDependentGlobals(GV->getOperand(i), Others);
132 
134  E = Others.end();
135  I != E; ++I)
136  VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
137 
138  // Now we can visit ourself
139  Order.push_back(GV);
140  Visited.insert(GV);
141  Visiting.erase(GV);
142 }
143 
144 void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
145  MCInst Inst;
146  lowerToMCInst(MI, Inst);
147  EmitToStreamer(*OutStreamer, Inst);
148 }
149 
150 // Handle symbol backtracking for targets that do not support image handles
151 bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
152  unsigned OpNo, MCOperand &MCOp) {
153  const MachineOperand &MO = MI->getOperand(OpNo);
154  const MCInstrDesc &MCID = MI->getDesc();
155 
156  if (MCID.TSFlags & NVPTXII::IsTexFlag) {
157  // This is a texture fetch, so operand 4 is a texref and operand 5 is
158  // a samplerref
159  if (OpNo == 4 && MO.isImm()) {
160  lowerImageHandleSymbol(MO.getImm(), MCOp);
161  return true;
162  }
163  if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
164  lowerImageHandleSymbol(MO.getImm(), MCOp);
165  return true;
166  }
167 
168  return false;
169  } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
170  unsigned VecSize =
171  1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
172 
173  // For a surface load of vector size N, the Nth operand will be the surfref
174  if (OpNo == VecSize && MO.isImm()) {
175  lowerImageHandleSymbol(MO.getImm(), MCOp);
176  return true;
177  }
178 
179  return false;
180  } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
181  // This is a surface store, so operand 0 is a surfref
182  if (OpNo == 0 && MO.isImm()) {
183  lowerImageHandleSymbol(MO.getImm(), MCOp);
184  return true;
185  }
186 
187  return false;
188  } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
189  // This is a query, so operand 1 is a surfref/texref
190  if (OpNo == 1 && MO.isImm()) {
191  lowerImageHandleSymbol(MO.getImm(), MCOp);
192  return true;
193  }
194 
195  return false;
196  }
197 
198  return false;
199 }
200 
201 void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
202  // Ewwww
203  LLVMTargetMachine &TM = const_cast<LLVMTargetMachine&>(MF->getTarget());
204  NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
206  const char *Sym = MFI->getImageHandleSymbol(Index);
207  std::string *SymNamePtr =
208  nvTM.getManagedStrPool()->getManagedString(Sym);
209  MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(StringRef(*SymNamePtr)));
210 }
211 
212 void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
213  OutMI.setOpcode(MI->getOpcode());
214  // Special: Do not mangle symbol operand of CALL_PROTOTYPE
215  if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
216  const MachineOperand &MO = MI->getOperand(0);
217  OutMI.addOperand(GetSymbolRef(
219  return;
220  }
221 
222  const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
223  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
224  const MachineOperand &MO = MI->getOperand(i);
225 
226  MCOperand MCOp;
227  if (!STI.hasImageHandles()) {
228  if (lowerImageHandleOperand(MI, i, MCOp)) {
229  OutMI.addOperand(MCOp);
230  continue;
231  }
232  }
233 
234  if (lowerOperand(MO, MCOp))
235  OutMI.addOperand(MCOp);
236  }
237 }
238 
239 bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
240  MCOperand &MCOp) {
241  switch (MO.getType()) {
242  default: llvm_unreachable("unknown operand type");
244  MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
245  break;
247  MCOp = MCOperand::createImm(MO.getImm());
248  break;
251  MO.getMBB()->getSymbol(), OutContext));
252  break;
255  break;
257  MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
258  break;
260  const ConstantFP *Cnt = MO.getFPImm();
261  const APFloat &Val = Cnt->getValueAPF();
262 
263  switch (Cnt->getType()->getTypeID()) {
264  default: report_fatal_error("Unsupported FP type"); break;
265  case Type::HalfTyID:
266  MCOp = MCOperand::createExpr(
268  break;
269  case Type::FloatTyID:
270  MCOp = MCOperand::createExpr(
272  break;
273  case Type::DoubleTyID:
274  MCOp = MCOperand::createExpr(
276  break;
277  }
278  break;
279  }
280  }
281  return true;
282 }
283 
284 unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
286  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
287 
288  DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
289  unsigned RegNum = RegMap[Reg];
290 
291  // Encode the register class in the upper 4 bits
292  // Must be kept in sync with NVPTXInstPrinter::printRegName
293  unsigned Ret = 0;
294  if (RC == &NVPTX::Int1RegsRegClass) {
295  Ret = (1 << 28);
296  } else if (RC == &NVPTX::Int16RegsRegClass) {
297  Ret = (2 << 28);
298  } else if (RC == &NVPTX::Int32RegsRegClass) {
299  Ret = (3 << 28);
300  } else if (RC == &NVPTX::Int64RegsRegClass) {
301  Ret = (4 << 28);
302  } else if (RC == &NVPTX::Float32RegsRegClass) {
303  Ret = (5 << 28);
304  } else if (RC == &NVPTX::Float64RegsRegClass) {
305  Ret = (6 << 28);
306  } else if (RC == &NVPTX::Float16RegsRegClass) {
307  Ret = (7 << 28);
308  } else if (RC == &NVPTX::Float16x2RegsRegClass) {
309  Ret = (8 << 28);
310  } else {
311  report_fatal_error("Bad register class");
312  }
313 
314  // Insert the vreg number
315  Ret |= (RegNum & 0x0FFFFFFF);
316  return Ret;
317  } else {
318  // Some special-use registers are actually physical registers.
319  // Encode this as the register class ID of 0 and the real register ID.
320  return Reg & 0x0FFFFFFF;
321  }
322 }
323 
325  const MCExpr *Expr;
327  OutContext);
328  return MCOperand::createExpr(Expr);
329 }
330 
331 void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
332  const DataLayout &DL = getDataLayout();
333  const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
334  const TargetLowering *TLI = STI.getTargetLowering();
335 
336  Type *Ty = F->getReturnType();
337 
338  bool isABI = (STI.getSmVersion() >= 20);
339 
340  if (Ty->getTypeID() == Type::VoidTyID)
341  return;
342 
343  O << " (";
344 
345  if (isABI) {
346  if (Ty->isFloatingPointTy() || (Ty->isIntegerTy() && !Ty->isIntegerTy(128))) {
347  unsigned size = 0;
348  if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
349  size = ITy->getBitWidth();
350  } else {
351  assert(Ty->isFloatingPointTy() && "Floating point type expected here");
352  size = Ty->getPrimitiveSizeInBits();
353  }
354  // PTX ABI requires all scalar return values to be at least 32
355  // bits in size. fp16 normally uses .b16 as its storage type in
356  // PTX, so its size must be adjusted here, too.
357  if (size < 32)
358  size = 32;
359 
360  O << ".param .b" << size << " func_retval0";
361  } else if (isa<PointerType>(Ty)) {
362  O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
363  << " func_retval0";
364  } else if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
365  unsigned totalsz = DL.getTypeAllocSize(Ty);
366  unsigned retAlignment = 0;
367  if (!getAlign(*F, 0, retAlignment))
368  retAlignment = DL.getABITypeAlignment(Ty);
369  O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
370  << "]";
371  } else
372  llvm_unreachable("Unknown return type");
373  } else {
374  SmallVector<EVT, 16> vtparts;
375  ComputeValueVTs(*TLI, DL, Ty, vtparts);
376  unsigned idx = 0;
377  for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
378  unsigned elems = 1;
379  EVT elemtype = vtparts[i];
380  if (vtparts[i].isVector()) {
381  elems = vtparts[i].getVectorNumElements();
382  elemtype = vtparts[i].getVectorElementType();
383  }
384 
385  for (unsigned j = 0, je = elems; j != je; ++j) {
386  unsigned sz = elemtype.getSizeInBits();
387  if (elemtype.isInteger() && (sz < 32))
388  sz = 32;
389  O << ".reg .b" << sz << " func_retval" << idx;
390  if (j < je - 1)
391  O << ", ";
392  ++idx;
393  }
394  if (i < e - 1)
395  O << ", ";
396  }
397  }
398  O << ") ";
399 }
400 
401 void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
402  raw_ostream &O) {
403  const Function &F = MF.getFunction();
404  printReturnValStr(&F, O);
405 }
406 
407 // Return true if MBB is the header of a loop marked with
408 // llvm.loop.unroll.disable.
409 // TODO: consider "#pragma unroll 1" which is equivalent to "#pragma nounroll".
410 bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
411  const MachineBasicBlock &MBB) const {
412  MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
413  // We insert .pragma "nounroll" only to the loop header.
414  if (!LI.isLoopHeader(&MBB))
415  return false;
416 
417  // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
418  // we iterate through each back edge of the loop with header MBB, and check
419  // whether its metadata contains llvm.loop.unroll.disable.
420  for (auto I = MBB.pred_begin(); I != MBB.pred_end(); ++I) {
421  const MachineBasicBlock *PMBB = *I;
422  if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
423  // Edges from other loops to MBB are not back edges.
424  continue;
425  }
426  if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
427  if (MDNode *LoopID =
428  PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
429  if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
430  return true;
431  }
432  }
433  }
434  return false;
435 }
436 
437 void NVPTXAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
439  if (isLoopHeaderOfNoUnroll(MBB))
440  OutStreamer->EmitRawText(StringRef("\t.pragma \"nounroll\";\n"));
441 }
442 
443 void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
444  SmallString<128> Str;
445  raw_svector_ostream O(Str);
446 
447  if (!GlobalsEmitted) {
448  emitGlobals(*MF->getFunction().getParent());
449  GlobalsEmitted = true;
450  }
451 
452  // Set up
453  MRI = &MF->getRegInfo();
454  F = &MF->getFunction();
455  emitLinkageDirective(F, O);
456  if (isKernelFunction(*F))
457  O << ".entry ";
458  else {
459  O << ".func ";
460  printReturnValStr(*MF, O);
461  }
462 
463  CurrentFnSym->print(O, MAI);
464 
465  emitFunctionParamList(*MF, O);
466 
467  if (isKernelFunction(*F))
468  emitKernelFunctionDirectives(*F, O);
469 
470  OutStreamer->EmitRawText(O.str());
471 
472  VRegMapping.clear();
473  // Emit open brace for function body.
474  OutStreamer->EmitRawText(StringRef("{\n"));
475  setAndEmitFunctionVirtualRegisters(*MF);
476 }
477 
479  bool Result = AsmPrinter::runOnMachineFunction(F);
480  // Emit closing brace for the body of function F.
481  // The closing brace must be emitted here because we need to emit additional
482  // debug labels/data after the last basic block.
483  // We need to emit the closing brace here because we don't have function that
484  // finished emission of the function body.
485  OutStreamer->EmitRawText(StringRef("}\n"));
486  return Result;
487 }
488 
489 void NVPTXAsmPrinter::EmitFunctionBodyStart() {
490  SmallString<128> Str;
491  raw_svector_ostream O(Str);
492  emitDemotedVars(&MF->getFunction(), O);
493  OutStreamer->EmitRawText(O.str());
494 }
495 
496 void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
497  VRegMapping.clear();
498 }
499 
501  SmallString<128> Str;
503  return OutContext.getOrCreateSymbol(Str);
504 }
505 
506 void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
507  unsigned RegNo = MI->getOperand(0).getReg();
509  OutStreamer->AddComment(Twine("implicit-def: ") +
510  getVirtualRegisterName(RegNo));
511  } else {
512  const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
513  OutStreamer->AddComment(Twine("implicit-def: ") +
514  STI.getRegisterInfo()->getName(RegNo));
515  }
516  OutStreamer->AddBlankLine();
517 }
518 
519 void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
520  raw_ostream &O) const {
521  // If the NVVM IR has some of reqntid* specified, then output
522  // the reqntid directive, and set the unspecified ones to 1.
523  // If none of reqntid* is specified, don't output reqntid directive.
524  unsigned reqntidx, reqntidy, reqntidz;
525  bool specified = false;
526  if (!getReqNTIDx(F, reqntidx))
527  reqntidx = 1;
528  else
529  specified = true;
530  if (!getReqNTIDy(F, reqntidy))
531  reqntidy = 1;
532  else
533  specified = true;
534  if (!getReqNTIDz(F, reqntidz))
535  reqntidz = 1;
536  else
537  specified = true;
538 
539  if (specified)
540  O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
541  << "\n";
542 
543  // If the NVVM IR has some of maxntid* specified, then output
544  // the maxntid directive, and set the unspecified ones to 1.
545  // If none of maxntid* is specified, don't output maxntid directive.
546  unsigned maxntidx, maxntidy, maxntidz;
547  specified = false;
548  if (!getMaxNTIDx(F, maxntidx))
549  maxntidx = 1;
550  else
551  specified = true;
552  if (!getMaxNTIDy(F, maxntidy))
553  maxntidy = 1;
554  else
555  specified = true;
556  if (!getMaxNTIDz(F, maxntidz))
557  maxntidz = 1;
558  else
559  specified = true;
560 
561  if (specified)
562  O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
563  << "\n";
564 
565  unsigned mincta;
566  if (getMinCTASm(F, mincta))
567  O << ".minnctapersm " << mincta << "\n";
568 
569  unsigned maxnreg;
570  if (getMaxNReg(F, maxnreg))
571  O << ".maxnreg " << maxnreg << "\n";
572 }
573 
574 std::string
576  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
577 
578  std::string Name;
579  raw_string_ostream NameStr(Name);
580 
581  VRegRCMap::const_iterator I = VRegMapping.find(RC);
582  assert(I != VRegMapping.end() && "Bad register class");
583  const DenseMap<unsigned, unsigned> &RegMap = I->second;
584 
585  VRegMap::const_iterator VI = RegMap.find(Reg);
586  assert(VI != RegMap.end() && "Bad virtual register");
587  unsigned MappedVR = VI->second;
588 
589  NameStr << getNVPTXRegClassStr(RC) << MappedVR;
590 
591  NameStr.flush();
592  return Name;
593 }
594 
595 void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
596  raw_ostream &O) {
597  O << getVirtualRegisterName(vr);
598 }
599 
600 void NVPTXAsmPrinter::printVecModifiedImmediate(
601  const MachineOperand &MO, const char *Modifier, raw_ostream &O) {
602  static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' };
603  int Imm = (int) MO.getImm();
604  if (0 == strcmp(Modifier, "vecelem"))
605  O << "_" << vecelem[Imm];
606  else if (0 == strcmp(Modifier, "vecv4comm1")) {
607  if ((Imm < 0) || (Imm > 3))
608  O << "//";
609  } else if (0 == strcmp(Modifier, "vecv4comm2")) {
610  if ((Imm < 4) || (Imm > 7))
611  O << "//";
612  } else if (0 == strcmp(Modifier, "vecv4pos")) {
613  if (Imm < 0)
614  Imm = 0;
615  O << "_" << vecelem[Imm % 4];
616  } else if (0 == strcmp(Modifier, "vecv2comm1")) {
617  if ((Imm < 0) || (Imm > 1))
618  O << "//";
619  } else if (0 == strcmp(Modifier, "vecv2comm2")) {
620  if ((Imm < 2) || (Imm > 3))
621  O << "//";
622  } else if (0 == strcmp(Modifier, "vecv2pos")) {
623  if (Imm < 0)
624  Imm = 0;
625  O << "_" << vecelem[Imm % 2];
626  } else
627  llvm_unreachable("Unknown Modifier on immediate operand");
628 }
629 
630 void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
631  emitLinkageDirective(F, O);
632  if (isKernelFunction(*F))
633  O << ".entry ";
634  else
635  O << ".func ";
636  printReturnValStr(F, O);
637  getSymbol(F)->print(O, MAI);
638  O << "\n";
639  emitFunctionParamList(F, O);
640  O << ";\n";
641 }
642 
643 static bool usedInGlobalVarDef(const Constant *C) {
644  if (!C)
645  return false;
646 
647  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
648  return GV->getName() != "llvm.used";
649  }
650 
651  for (const User *U : C->users())
652  if (const Constant *C = dyn_cast<Constant>(U))
653  if (usedInGlobalVarDef(C))
654  return true;
655 
656  return false;
657 }
658 
659 static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
660  if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
661  if (othergv->getName() == "llvm.used")
662  return true;
663  }
664 
665  if (const Instruction *instr = dyn_cast<Instruction>(U)) {
666  if (instr->getParent() && instr->getParent()->getParent()) {
667  const Function *curFunc = instr->getParent()->getParent();
668  if (oneFunc && (curFunc != oneFunc))
669  return false;
670  oneFunc = curFunc;
671  return true;
672  } else
673  return false;
674  }
675 
676  for (const User *UU : U->users())
677  if (!usedInOneFunc(UU, oneFunc))
678  return false;
679 
680  return true;
681 }
682 
683 /* Find out if a global variable can be demoted to local scope.
684  * Currently, this is valid for CUDA shared variables, which have local
685  * scope and global lifetime. So the conditions to check are :
686  * 1. Is the global variable in shared address space?
687  * 2. Does it have internal linkage?
688  * 3. Is the global variable referenced only in one function?
689  */
690 static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
691  if (!gv->hasInternalLinkage())
692  return false;
693  PointerType *Pty = gv->getType();
695  return false;
696 
697  const Function *oneFunc = nullptr;
698 
699  bool flag = usedInOneFunc(gv, oneFunc);
700  if (!flag)
701  return false;
702  if (!oneFunc)
703  return false;
704  f = oneFunc;
705  return true;
706 }
707 
708 static bool useFuncSeen(const Constant *C,
710  for (const User *U : C->users()) {
711  if (const Constant *cu = dyn_cast<Constant>(U)) {
712  if (useFuncSeen(cu, seenMap))
713  return true;
714  } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
715  const BasicBlock *bb = I->getParent();
716  if (!bb)
717  continue;
718  const Function *caller = bb->getParent();
719  if (!caller)
720  continue;
721  if (seenMap.find(caller) != seenMap.end())
722  return true;
723  }
724  }
725  return false;
726 }
727 
728 void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
730  for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
731  const Function *F = &*FI;
732 
733  if (F->getAttributes().hasFnAttribute("nvptx-libcall-callee")) {
734  emitDeclaration(F, O);
735  continue;
736  }
737 
738  if (F->isDeclaration()) {
739  if (F->use_empty())
740  continue;
741  if (F->getIntrinsicID())
742  continue;
743  emitDeclaration(F, O);
744  continue;
745  }
746  for (const User *U : F->users()) {
747  if (const Constant *C = dyn_cast<Constant>(U)) {
748  if (usedInGlobalVarDef(C)) {
749  // The use is in the initialization of a global variable
750  // that is a function pointer, so print a declaration
751  // for the original function
752  emitDeclaration(F, O);
753  break;
754  }
755  // Emit a declaration of this function if the function that
756  // uses this constant expr has already been seen.
757  if (useFuncSeen(C, seenMap)) {
758  emitDeclaration(F, O);
759  break;
760  }
761  }
762 
763  if (!isa<Instruction>(U))
764  continue;
765  const Instruction *instr = cast<Instruction>(U);
766  const BasicBlock *bb = instr->getParent();
767  if (!bb)
768  continue;
769  const Function *caller = bb->getParent();
770  if (!caller)
771  continue;
772 
773  // If a caller has already been seen, then the caller is
774  // appearing in the module before the callee. so print out
775  // a declaration for the callee.
776  if (seenMap.find(caller) != seenMap.end()) {
777  emitDeclaration(F, O);
778  break;
779  }
780  }
781  seenMap[F] = true;
782  }
783 }
784 
786  if (!GV) return true;
787  const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
788  if (!InitList) return true; // Not an array; we don't know how to parse.
789  return InitList->getNumOperands() == 0;
790 }
791 
793  // Construct a default subtarget off of the TargetMachine defaults. The
794  // rest of NVPTX isn't friendly to change subtargets per function and
795  // so the default TargetMachine will have all of the options.
796  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
797  const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl());
798 
799  if (M.alias_size()) {
800  report_fatal_error("Module has aliases, which NVPTX does not support.");
801  return true; // error
802  }
803  if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors"))) {
805  "Module has a nontrivial global ctor, which NVPTX does not support.");
806  return true; // error
807  }
808  if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors"))) {
810  "Module has a nontrivial global dtor, which NVPTX does not support.");
811  return true; // error
812  }
813 
814  SmallString<128> Str1;
815  raw_svector_ostream OS1(Str1);
816 
817  // We need to call the parent's one explicitly.
818  bool Result = AsmPrinter::doInitialization(M);
819 
820  // Emit header before any dwarf directives are emitted below.
821  emitHeader(M, OS1, *STI);
822  OutStreamer->EmitRawText(OS1.str());
823 
824  // Emit module-level inline asm if it exists.
825  if (!M.getModuleInlineAsm().empty()) {
826  OutStreamer->AddComment("Start of file scope inline assembly");
827  OutStreamer->AddBlankLine();
828  OutStreamer->EmitRawText(StringRef(M.getModuleInlineAsm()));
829  OutStreamer->AddBlankLine();
830  OutStreamer->AddComment("End of file scope inline assembly");
831  OutStreamer->AddBlankLine();
832  }
833 
834  GlobalsEmitted = false;
835 
836  return Result;
837 }
838 
839 void NVPTXAsmPrinter::emitGlobals(const Module &M) {
840  SmallString<128> Str2;
841  raw_svector_ostream OS2(Str2);
842 
843  emitDeclarations(M, OS2);
844 
845  // As ptxas does not support forward references of globals, we need to first
846  // sort the list of module-level globals in def-use order. We visit each
847  // global variable in order, and ensure that we emit it *after* its dependent
848  // globals. We use a little extra memory maintaining both a set and a list to
849  // have fast searches while maintaining a strict ordering.
853 
854  // Visit each global variable, in order
855  for (const GlobalVariable &I : M.globals())
856  VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
857 
858  assert(GVVisited.size() == M.getGlobalList().size() &&
859  "Missed a global variable");
860  assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
861 
862  // Print out module-level global variables in proper order
863  for (unsigned i = 0, e = Globals.size(); i != e; ++i)
864  printModuleLevelGV(Globals[i], OS2);
865 
866  OS2 << '\n';
867 
868  OutStreamer->EmitRawText(OS2.str());
869 }
870 
871 void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
872  const NVPTXSubtarget &STI) {
873  O << "//\n";
874  O << "// Generated by LLVM NVPTX Back-End\n";
875  O << "//\n";
876  O << "\n";
877 
878  unsigned PTXVersion = STI.getPTXVersion();
879  O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
880 
881  O << ".target ";
882  O << STI.getTargetName();
883 
884  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
885  if (NTM.getDrvInterface() == NVPTX::NVCL)
886  O << ", texmode_independent";
887 
888  bool HasFullDebugInfo = false;
889  for (DICompileUnit *CU : M.debug_compile_units()) {
890  switch(CU->getEmissionKind()) {
893  break;
896  HasFullDebugInfo = true;
897  break;
898  }
899  if (HasFullDebugInfo)
900  break;
901  }
902  // FIXME: remove comment once debug info is properly supported.
903  if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo)
904  O << "//, debug";
905 
906  O << "\n";
907 
908  O << ".address_size ";
909  if (NTM.is64Bit())
910  O << "64";
911  else
912  O << "32";
913  O << "\n";
914 
915  O << "\n";
916 }
917 
919  bool HasDebugInfo = MMI && MMI->hasDebugInfo();
920 
921  // If we did not emit any functions, then the global declarations have not
922  // yet been emitted.
923  if (!GlobalsEmitted) {
924  emitGlobals(M);
925  GlobalsEmitted = true;
926  }
927 
928  // XXX Temproarily remove global variables so that doFinalization() will not
929  // emit them again (global variables are emitted at beginning).
930 
931  Module::GlobalListType &global_list = M.getGlobalList();
932  int i, n = global_list.size();
933  GlobalVariable **gv_array = new GlobalVariable *[n];
934 
935  // first, back-up GlobalVariable in gv_array
936  i = 0;
937  for (Module::global_iterator I = global_list.begin(), E = global_list.end();
938  I != E; ++I)
939  gv_array[i++] = &*I;
940 
941  // second, empty global_list
942  while (!global_list.empty())
943  global_list.remove(global_list.begin());
944 
945  // call doFinalization
946  bool ret = AsmPrinter::doFinalization(M);
947 
948  // now we restore global variables
949  for (i = 0; i < n; i++)
950  global_list.insert(global_list.end(), gv_array[i]);
951 
953 
954  delete[] gv_array;
955  // FIXME: remove comment once debug info is properly supported.
956  // Close the last emitted section
957  if (HasDebugInfo)
958  OutStreamer->EmitRawText("//\t}");
959 
960  // Output last DWARF .file directives, if any.
961  static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer())
962  ->outputDwarfFileDirectives();
963 
964  return ret;
965 
966  //bool Result = AsmPrinter::doFinalization(M);
967  // Instead of calling the parents doFinalization, we may
968  // clone parents doFinalization and customize here.
969  // Currently, we if NVISA out the EmitGlobals() in
970  // parent's doFinalization, which is too intrusive.
971  //
972  // Same for the doInitialization.
973  //return Result;
974 }
975 
976 // This function emits appropriate linkage directives for
977 // functions and global variables.
978 //
979 // extern function declaration -> .extern
980 // extern function definition -> .visible
981 // external global variable with init -> .visible
982 // external without init -> .extern
983 // appending -> not allowed, assert.
984 // for any linkage other than
985 // internal, private, linker_private,
986 // linker_private_weak, linker_private_weak_def_auto,
987 // we emit -> .weak.
988 
989 void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
990  raw_ostream &O) {
991  if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
992  if (V->hasExternalLinkage()) {
993  if (isa<GlobalVariable>(V)) {
994  const GlobalVariable *GVar = cast<GlobalVariable>(V);
995  if (GVar) {
996  if (GVar->hasInitializer())
997  O << ".visible ";
998  else
999  O << ".extern ";
1000  }
1001  } else if (V->isDeclaration())
1002  O << ".extern ";
1003  else
1004  O << ".visible ";
1005  } else if (V->hasAppendingLinkage()) {
1006  std::string msg;
1007  msg.append("Error: ");
1008  msg.append("Symbol ");
1009  if (V->hasName())
1010  msg.append(V->getName());
1011  msg.append("has unsupported appending linkage type");
1012  llvm_unreachable(msg.c_str());
1013  } else if (!V->hasInternalLinkage() &&
1014  !V->hasPrivateLinkage()) {
1015  O << ".weak ";
1016  }
1017  }
1018 }
1019 
1020 void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
1021  raw_ostream &O,
1022  bool processDemoted) {
1023  // Skip meta data
1024  if (GVar->hasSection()) {
1025  if (GVar->getSection() == "llvm.metadata")
1026  return;
1027  }
1028 
1029  // Skip LLVM intrinsic global variables
1030  if (GVar->getName().startswith("llvm.") ||
1031  GVar->getName().startswith("nvvm."))
1032  return;
1033 
1034  const DataLayout &DL = getDataLayout();
1035 
1036  // GlobalVariables are always constant pointers themselves.
1037  PointerType *PTy = GVar->getType();
1038  Type *ETy = GVar->getValueType();
1039 
1040  if (GVar->hasExternalLinkage()) {
1041  if (GVar->hasInitializer())
1042  O << ".visible ";
1043  else
1044  O << ".extern ";
1045  } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
1047  GVar->hasCommonLinkage()) {
1048  O << ".weak ";
1049  }
1050 
1051  if (isTexture(*GVar)) {
1052  O << ".global .texref " << getTextureName(*GVar) << ";\n";
1053  return;
1054  }
1055 
1056  if (isSurface(*GVar)) {
1057  O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
1058  return;
1059  }
1060 
1061  if (GVar->isDeclaration()) {
1062  // (extern) declarations, no definition or initializer
1063  // Currently the only known declaration is for an automatic __local
1064  // (.shared) promoted to global.
1065  emitPTXGlobalVariable(GVar, O);
1066  O << ";\n";
1067  return;
1068  }
1069 
1070  if (isSampler(*GVar)) {
1071  O << ".global .samplerref " << getSamplerName(*GVar);
1072 
1073  const Constant *Initializer = nullptr;
1074  if (GVar->hasInitializer())
1075  Initializer = GVar->getInitializer();
1076  const ConstantInt *CI = nullptr;
1077  if (Initializer)
1078  CI = dyn_cast<ConstantInt>(Initializer);
1079  if (CI) {
1080  unsigned sample = CI->getZExtValue();
1081 
1082  O << " = { ";
1083 
1084  for (int i = 0,
1085  addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
1086  i < 3; i++) {
1087  O << "addr_mode_" << i << " = ";
1088  switch (addr) {
1089  case 0:
1090  O << "wrap";
1091  break;
1092  case 1:
1093  O << "clamp_to_border";
1094  break;
1095  case 2:
1096  O << "clamp_to_edge";
1097  break;
1098  case 3:
1099  O << "wrap";
1100  break;
1101  case 4:
1102  O << "mirror";
1103  break;
1104  }
1105  O << ", ";
1106  }
1107  O << "filter_mode = ";
1108  switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
1109  case 0:
1110  O << "nearest";
1111  break;
1112  case 1:
1113  O << "linear";
1114  break;
1115  case 2:
1116  llvm_unreachable("Anisotropic filtering is not supported");
1117  default:
1118  O << "nearest";
1119  break;
1120  }
1121  if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
1122  O << ", force_unnormalized_coords = 1";
1123  }
1124  O << " }";
1125  }
1126 
1127  O << ";\n";
1128  return;
1129  }
1130 
1131  if (GVar->hasPrivateLinkage()) {
1132  if (strncmp(GVar->getName().data(), "unrollpragma", 12) == 0)
1133  return;
1134 
1135  // FIXME - need better way (e.g. Metadata) to avoid generating this global
1136  if (strncmp(GVar->getName().data(), "filename", 8) == 0)
1137  return;
1138  if (GVar->use_empty())
1139  return;
1140  }
1141 
1142  const Function *demotedFunc = nullptr;
1143  if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
1144  O << "// " << GVar->getName() << " has been demoted\n";
1145  if (localDecls.find(demotedFunc) != localDecls.end())
1146  localDecls[demotedFunc].push_back(GVar);
1147  else {
1148  std::vector<const GlobalVariable *> temp;
1149  temp.push_back(GVar);
1150  localDecls[demotedFunc] = temp;
1151  }
1152  return;
1153  }
1154 
1155  O << ".";
1156  emitPTXAddressSpace(PTy->getAddressSpace(), O);
1157 
1158  if (isManaged(*GVar)) {
1159  O << " .attribute(.managed)";
1160  }
1161 
1162  if (GVar->getAlignment() == 0)
1163  O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
1164  else
1165  O << " .align " << GVar->getAlignment();
1166 
1167  if (ETy->isFloatingPointTy() || ETy->isPointerTy() ||
1168  (ETy->isIntegerTy() && ETy->getScalarSizeInBits() <= 64)) {
1169  O << " .";
1170  // Special case: ABI requires that we use .u8 for predicates
1171  if (ETy->isIntegerTy(1))
1172  O << "u8";
1173  else
1174  O << getPTXFundamentalTypeStr(ETy, false);
1175  O << " ";
1176  getSymbol(GVar)->print(O, MAI);
1177 
1178  // Ptx allows variable initilization only for constant and global state
1179  // spaces.
1180  if (GVar->hasInitializer()) {
1181  if ((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1182  (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) {
1183  const Constant *Initializer = GVar->getInitializer();
1184  // 'undef' is treated as there is no value specified.
1185  if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
1186  O << " = ";
1187  printScalarConstant(Initializer, O);
1188  }
1189  } else {
1190  // The frontend adds zero-initializer to device and constant variables
1191  // that don't have an initial value, and UndefValue to shared
1192  // variables, so skip warning for this case.
1193  if (!GVar->getInitializer()->isNullValue() &&
1194  !isa<UndefValue>(GVar->getInitializer())) {
1195  report_fatal_error("initial value of '" + GVar->getName() +
1196  "' is not allowed in addrspace(" +
1197  Twine(PTy->getAddressSpace()) + ")");
1198  }
1199  }
1200  }
1201  } else {
1202  unsigned int ElementSize = 0;
1203 
1204  // Although PTX has direct support for struct type and array type and
1205  // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1206  // targets that support these high level field accesses. Structs, arrays
1207  // and vectors are lowered into arrays of bytes.
1208  switch (ETy->getTypeID()) {
1209  case Type::IntegerTyID: // Integers larger than 64 bits
1210  case Type::StructTyID:
1211  case Type::ArrayTyID:
1212  case Type::VectorTyID:
1213  ElementSize = DL.getTypeStoreSize(ETy);
1214  // Ptx allows variable initilization only for constant and
1215  // global state spaces.
1216  if (((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1217  (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
1218  GVar->hasInitializer()) {
1219  const Constant *Initializer = GVar->getInitializer();
1220  if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1221  AggBuffer aggBuffer(ElementSize, O, *this);
1222  bufferAggregateConstant(Initializer, &aggBuffer);
1223  if (aggBuffer.numSymbols) {
1224  if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) {
1225  O << " .u64 ";
1226  getSymbol(GVar)->print(O, MAI);
1227  O << "[";
1228  O << ElementSize / 8;
1229  } else {
1230  O << " .u32 ";
1231  getSymbol(GVar)->print(O, MAI);
1232  O << "[";
1233  O << ElementSize / 4;
1234  }
1235  O << "]";
1236  } else {
1237  O << " .b8 ";
1238  getSymbol(GVar)->print(O, MAI);
1239  O << "[";
1240  O << ElementSize;
1241  O << "]";
1242  }
1243  O << " = {";
1244  aggBuffer.print();
1245  O << "}";
1246  } else {
1247  O << " .b8 ";
1248  getSymbol(GVar)->print(O, MAI);
1249  if (ElementSize) {
1250  O << "[";
1251  O << ElementSize;
1252  O << "]";
1253  }
1254  }
1255  } else {
1256  O << " .b8 ";
1257  getSymbol(GVar)->print(O, MAI);
1258  if (ElementSize) {
1259  O << "[";
1260  O << ElementSize;
1261  O << "]";
1262  }
1263  }
1264  break;
1265  default:
1266  llvm_unreachable("type not supported yet");
1267  }
1268  }
1269  O << ";\n";
1270 }
1271 
1272 void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
1273  if (localDecls.find(f) == localDecls.end())
1274  return;
1275 
1276  std::vector<const GlobalVariable *> &gvars = localDecls[f];
1277 
1278  for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
1279  O << "\t// demoted variable\n\t";
1280  printModuleLevelGV(gvars[i], O, true);
1281  }
1282 }
1283 
1284 void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1285  raw_ostream &O) const {
1286  switch (AddressSpace) {
1287  case ADDRESS_SPACE_LOCAL:
1288  O << "local";
1289  break;
1290  case ADDRESS_SPACE_GLOBAL:
1291  O << "global";
1292  break;
1293  case ADDRESS_SPACE_CONST:
1294  O << "const";
1295  break;
1296  case ADDRESS_SPACE_SHARED:
1297  O << "shared";
1298  break;
1299  default:
1300  report_fatal_error("Bad address space found while emitting PTX: " +
1302  break;
1303  }
1304 }
1305 
1306 std::string
1307 NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
1308  switch (Ty->getTypeID()) {
1309  default:
1310  llvm_unreachable("unexpected type");
1311  break;
1312  case Type::IntegerTyID: {
1313  unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1314  if (NumBits == 1)
1315  return "pred";
1316  else if (NumBits <= 64) {
1317  std::string name = "u";
1318  return name + utostr(NumBits);
1319  } else {
1320  llvm_unreachable("Integer too large");
1321  break;
1322  }
1323  break;
1324  }
1325  case Type::HalfTyID:
1326  // fp16 is stored as .b16 for compatibility with pre-sm_53 PTX assembly.
1327  return "b16";
1328  case Type::FloatTyID:
1329  return "f32";
1330  case Type::DoubleTyID:
1331  return "f64";
1332  case Type::PointerTyID:
1333  if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit())
1334  if (useB4PTR)
1335  return "b64";
1336  else
1337  return "u64";
1338  else if (useB4PTR)
1339  return "b32";
1340  else
1341  return "u32";
1342  }
1343  llvm_unreachable("unexpected type");
1344  return nullptr;
1345 }
1346 
1347 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1348  raw_ostream &O) {
1349  const DataLayout &DL = getDataLayout();
1350 
1351  // GlobalVariables are always constant pointers themselves.
1352  Type *ETy = GVar->getValueType();
1353 
1354  O << ".";
1355  emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
1356  if (GVar->getAlignment() == 0)
1357  O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
1358  else
1359  O << " .align " << GVar->getAlignment();
1360 
1361  // Special case for i128
1362  if (ETy->isIntegerTy(128)) {
1363  O << " .b8 ";
1364  getSymbol(GVar)->print(O, MAI);
1365  O << "[16]";
1366  return;
1367  }
1368 
1369  if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) {
1370  O << " .";
1371  O << getPTXFundamentalTypeStr(ETy);
1372  O << " ";
1373  getSymbol(GVar)->print(O, MAI);
1374  return;
1375  }
1376 
1377  int64_t ElementSize = 0;
1378 
1379  // Although PTX has direct support for struct type and array type and LLVM IR
1380  // is very similar to PTX, the LLVM CodeGen does not support for targets that
1381  // support these high level field accesses. Structs and arrays are lowered
1382  // into arrays of bytes.
1383  switch (ETy->getTypeID()) {
1384  case Type::StructTyID:
1385  case Type::ArrayTyID:
1386  case Type::VectorTyID:
1387  ElementSize = DL.getTypeStoreSize(ETy);
1388  O << " .b8 ";
1389  getSymbol(GVar)->print(O, MAI);
1390  O << "[";
1391  if (ElementSize) {
1392  O << ElementSize;
1393  }
1394  O << "]";
1395  break;
1396  default:
1397  llvm_unreachable("type not supported yet");
1398  }
1399 }
1400 
1401 static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty) {
1402  if (Ty->isSingleValueType())
1403  return DL.getPrefTypeAlignment(Ty);
1404 
1405  auto *ATy = dyn_cast<ArrayType>(Ty);
1406  if (ATy)
1407  return getOpenCLAlignment(DL, ATy->getElementType());
1408 
1409  auto *STy = dyn_cast<StructType>(Ty);
1410  if (STy) {
1411  unsigned int alignStruct = 1;
1412  // Go through each element of the struct and find the
1413  // largest alignment.
1414  for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
1415  Type *ETy = STy->getElementType(i);
1416  unsigned int align = getOpenCLAlignment(DL, ETy);
1417  if (align > alignStruct)
1418  alignStruct = align;
1419  }
1420  return alignStruct;
1421  }
1422 
1423  auto *FTy = dyn_cast<FunctionType>(Ty);
1424  if (FTy)
1425  return DL.getPointerPrefAlignment();
1426  return DL.getPrefTypeAlignment(Ty);
1427 }
1428 
1429 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
1430  int paramIndex, raw_ostream &O) {
1431  getSymbol(I->getParent())->print(O, MAI);
1432  O << "_param_" << paramIndex;
1433 }
1434 
1435 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1436  const DataLayout &DL = getDataLayout();
1437  const AttributeList &PAL = F->getAttributes();
1438  const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
1439  const TargetLowering *TLI = STI.getTargetLowering();
1441  unsigned paramIndex = 0;
1442  bool first = true;
1443  bool isKernelFunc = isKernelFunction(*F);
1444  bool isABI = (STI.getSmVersion() >= 20);
1445  bool hasImageHandles = STI.hasImageHandles();
1446  MVT thePointerTy = TLI->getPointerTy(DL);
1447 
1448  if (F->arg_empty()) {
1449  O << "()\n";
1450  return;
1451  }
1452 
1453  O << "(\n";
1454 
1455  for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
1456  Type *Ty = I->getType();
1457 
1458  if (!first)
1459  O << ",\n";
1460 
1461  first = false;
1462 
1463  // Handle image/sampler parameters
1464  if (isKernelFunction(*F)) {
1465  if (isSampler(*I) || isImage(*I)) {
1466  if (isImage(*I)) {
1467  std::string sname = I->getName();
1468  if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
1469  if (hasImageHandles)
1470  O << "\t.param .u64 .ptr .surfref ";
1471  else
1472  O << "\t.param .surfref ";
1473  CurrentFnSym->print(O, MAI);
1474  O << "_param_" << paramIndex;
1475  }
1476  else { // Default image is read_only
1477  if (hasImageHandles)
1478  O << "\t.param .u64 .ptr .texref ";
1479  else
1480  O << "\t.param .texref ";
1481  CurrentFnSym->print(O, MAI);
1482  O << "_param_" << paramIndex;
1483  }
1484  } else {
1485  if (hasImageHandles)
1486  O << "\t.param .u64 .ptr .samplerref ";
1487  else
1488  O << "\t.param .samplerref ";
1489  CurrentFnSym->print(O, MAI);
1490  O << "_param_" << paramIndex;
1491  }
1492  continue;
1493  }
1494  }
1495 
1496  if (!PAL.hasParamAttribute(paramIndex, Attribute::ByVal)) {
1497  if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
1498  // Just print .param .align <a> .b8 .param[size];
1499  // <a> = PAL.getparamalignment
1500  // size = typeallocsize of element type
1501  unsigned align = PAL.getParamAlignment(paramIndex);
1502  if (align == 0)
1503  align = DL.getABITypeAlignment(Ty);
1504 
1505  unsigned sz = DL.getTypeAllocSize(Ty);
1506  O << "\t.param .align " << align << " .b8 ";
1507  printParamName(I, paramIndex, O);
1508  O << "[" << sz << "]";
1509 
1510  continue;
1511  }
1512  // Just a scalar
1513  auto *PTy = dyn_cast<PointerType>(Ty);
1514  if (isKernelFunc) {
1515  if (PTy) {
1516  // Special handling for pointer arguments to kernel
1517  O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
1518 
1519  if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
1520  NVPTX::CUDA) {
1521  Type *ETy = PTy->getElementType();
1522  int addrSpace = PTy->getAddressSpace();
1523  switch (addrSpace) {
1524  default:
1525  O << ".ptr ";
1526  break;
1527  case ADDRESS_SPACE_CONST:
1528  O << ".ptr .const ";
1529  break;
1530  case ADDRESS_SPACE_SHARED:
1531  O << ".ptr .shared ";
1532  break;
1533  case ADDRESS_SPACE_GLOBAL:
1534  O << ".ptr .global ";
1535  break;
1536  }
1537  O << ".align " << (int)getOpenCLAlignment(DL, ETy) << " ";
1538  }
1539  printParamName(I, paramIndex, O);
1540  continue;
1541  }
1542 
1543  // non-pointer scalar to kernel func
1544  O << "\t.param .";
1545  // Special case: predicate operands become .u8 types
1546  if (Ty->isIntegerTy(1))
1547  O << "u8";
1548  else
1549  O << getPTXFundamentalTypeStr(Ty);
1550  O << " ";
1551  printParamName(I, paramIndex, O);
1552  continue;
1553  }
1554  // Non-kernel function, just print .param .b<size> for ABI
1555  // and .reg .b<size> for non-ABI
1556  unsigned sz = 0;
1557  if (isa<IntegerType>(Ty)) {
1558  sz = cast<IntegerType>(Ty)->getBitWidth();
1559  if (sz < 32)
1560  sz = 32;
1561  } else if (isa<PointerType>(Ty))
1562  sz = thePointerTy.getSizeInBits();
1563  else if (Ty->isHalfTy())
1564  // PTX ABI requires all scalar parameters to be at least 32
1565  // bits in size. fp16 normally uses .b16 as its storage type
1566  // in PTX, so its size must be adjusted here, too.
1567  sz = 32;
1568  else
1569  sz = Ty->getPrimitiveSizeInBits();
1570  if (isABI)
1571  O << "\t.param .b" << sz << " ";
1572  else
1573  O << "\t.reg .b" << sz << " ";
1574  printParamName(I, paramIndex, O);
1575  continue;
1576  }
1577 
1578  // param has byVal attribute. So should be a pointer
1579  auto *PTy = dyn_cast<PointerType>(Ty);
1580  assert(PTy && "Param with byval attribute should be a pointer type");
1581  Type *ETy = PTy->getElementType();
1582 
1583  if (isABI || isKernelFunc) {
1584  // Just print .param .align <a> .b8 .param[size];
1585  // <a> = PAL.getparamalignment
1586  // size = typeallocsize of element type
1587  unsigned align = PAL.getParamAlignment(paramIndex);
1588  if (align == 0)
1589  align = DL.getABITypeAlignment(ETy);
1590  // Work around a bug in ptxas. When PTX code takes address of
1591  // byval parameter with alignment < 4, ptxas generates code to
1592  // spill argument into memory. Alas on sm_50+ ptxas generates
1593  // SASS code that fails with misaligned access. To work around
1594  // the problem, make sure that we align byval parameters by at
1595  // least 4. Matching change must be made in LowerCall() where we
1596  // prepare parameters for the call.
1597  //
1598  // TODO: this will need to be undone when we get to support multi-TU
1599  // device-side compilation as it breaks ABI compatibility with nvcc.
1600  // Hopefully ptxas bug is fixed by then.
1601  if (!isKernelFunc && align < 4)
1602  align = 4;
1603  unsigned sz = DL.getTypeAllocSize(ETy);
1604  O << "\t.param .align " << align << " .b8 ";
1605  printParamName(I, paramIndex, O);
1606  O << "[" << sz << "]";
1607  continue;
1608  } else {
1609  // Split the ETy into constituent parts and
1610  // print .param .b<size> <name> for each part.
1611  // Further, if a part is vector, print the above for
1612  // each vector element.
1613  SmallVector<EVT, 16> vtparts;
1614  ComputeValueVTs(*TLI, DL, ETy, vtparts);
1615  for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
1616  unsigned elems = 1;
1617  EVT elemtype = vtparts[i];
1618  if (vtparts[i].isVector()) {
1619  elems = vtparts[i].getVectorNumElements();
1620  elemtype = vtparts[i].getVectorElementType();
1621  }
1622 
1623  for (unsigned j = 0, je = elems; j != je; ++j) {
1624  unsigned sz = elemtype.getSizeInBits();
1625  if (elemtype.isInteger() && (sz < 32))
1626  sz = 32;
1627  O << "\t.reg .b" << sz << " ";
1628  printParamName(I, paramIndex, O);
1629  if (j < je - 1)
1630  O << ",\n";
1631  ++paramIndex;
1632  }
1633  if (i < e - 1)
1634  O << ",\n";
1635  }
1636  --paramIndex;
1637  continue;
1638  }
1639  }
1640 
1641  O << "\n)\n";
1642 }
1643 
1644 void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
1645  raw_ostream &O) {
1646  const Function &F = MF.getFunction();
1647  emitFunctionParamList(&F, O);
1648 }
1649 
1650 void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1651  const MachineFunction &MF) {
1652  SmallString<128> Str;
1653  raw_svector_ostream O(Str);
1654 
1655  // Map the global virtual register number to a register class specific
1656  // virtual register number starting from 1 with that class.
1658  //unsigned numRegClasses = TRI->getNumRegClasses();
1659 
1660  // Emit the Fake Stack Object
1661  const MachineFrameInfo &MFI = MF.getFrameInfo();
1662  int NumBytes = (int) MFI.getStackSize();
1663  if (NumBytes) {
1664  O << "\t.local .align " << MFI.getMaxAlignment() << " .b8 \t" << DEPOTNAME
1665  << getFunctionNumber() << "[" << NumBytes << "];\n";
1666  if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
1667  O << "\t.reg .b64 \t%SP;\n";
1668  O << "\t.reg .b64 \t%SPL;\n";
1669  } else {
1670  O << "\t.reg .b32 \t%SP;\n";
1671  O << "\t.reg .b32 \t%SPL;\n";
1672  }
1673  }
1674 
1675  // Go through all virtual registers to establish the mapping between the
1676  // global virtual
1677  // register number and the per class virtual register number.
1678  // We use the per class virtual register number in the ptx output.
1679  unsigned int numVRs = MRI->getNumVirtRegs();
1680  for (unsigned i = 0; i < numVRs; i++) {
1681  unsigned int vr = TRI->index2VirtReg(i);
1682  const TargetRegisterClass *RC = MRI->getRegClass(vr);
1683  DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1684  int n = regmap.size();
1685  regmap.insert(std::make_pair(vr, n + 1));
1686  }
1687 
1688  // Emit register declarations
1689  // @TODO: Extract out the real register usage
1690  // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
1691  // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
1692  // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
1693  // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
1694  // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
1695  // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
1696  // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
1697 
1698  // Emit declaration of the virtual registers or 'physical' registers for
1699  // each register class
1700  for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
1701  const TargetRegisterClass *RC = TRI->getRegClass(i);
1702  DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1703  std::string rcname = getNVPTXRegClassName(RC);
1704  std::string rcStr = getNVPTXRegClassStr(RC);
1705  int n = regmap.size();
1706 
1707  // Only declare those registers that may be used.
1708  if (n) {
1709  O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
1710  << ">;\n";
1711  }
1712  }
1713 
1714  OutStreamer->EmitRawText(O.str());
1715 }
1716 
1717 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
1718  APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1719  bool ignored;
1720  unsigned int numHex;
1721  const char *lead;
1722 
1723  if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1724  numHex = 8;
1725  lead = "0f";
1727  } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
1728  numHex = 16;
1729  lead = "0d";
1731  } else
1732  llvm_unreachable("unsupported fp type");
1733 
1734  APInt API = APF.bitcastToAPInt();
1735  O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
1736 }
1737 
1738 void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1739  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1740  O << CI->getValue();
1741  return;
1742  }
1743  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1744  printFPConstant(CFP, O);
1745  return;
1746  }
1747  if (isa<ConstantPointerNull>(CPV)) {
1748  O << "0";
1749  return;
1750  }
1751  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1752  bool IsNonGenericPointer = false;
1753  if (GVar->getType()->getAddressSpace() != 0) {
1754  IsNonGenericPointer = true;
1755  }
1756  if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
1757  O << "generic(";
1758  getSymbol(GVar)->print(O, MAI);
1759  O << ")";
1760  } else {
1761  getSymbol(GVar)->print(O, MAI);
1762  }
1763  return;
1764  }
1765  if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1766  const Value *v = Cexpr->stripPointerCasts();
1767  PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType());
1768  bool IsNonGenericPointer = false;
1769  if (PTy && PTy->getAddressSpace() != 0) {
1770  IsNonGenericPointer = true;
1771  }
1772  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1773  if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
1774  O << "generic(";
1775  getSymbol(GVar)->print(O, MAI);
1776  O << ")";
1777  } else {
1778  getSymbol(GVar)->print(O, MAI);
1779  }
1780  return;
1781  } else {
1782  lowerConstant(CPV)->print(O, MAI);
1783  return;
1784  }
1785  }
1786  llvm_unreachable("Not scalar type found in printScalarConstant()");
1787 }
1788 
1789 // These utility functions assure we get the right sequence of bytes for a given
1790 // type even for big-endian machines
1791 template <typename T> static void ConvertIntToBytes(unsigned char *p, T val) {
1792  int64_t vp = (int64_t)val;
1793  for (unsigned i = 0; i < sizeof(T); ++i) {
1794  p[i] = (unsigned char)vp;
1795  vp >>= 8;
1796  }
1797 }
1798 static void ConvertFloatToBytes(unsigned char *p, float val) {
1799  int32_t *vp = (int32_t *)&val;
1800  for (unsigned i = 0; i < sizeof(int32_t); ++i) {
1801  p[i] = (unsigned char)*vp;
1802  *vp >>= 8;
1803  }
1804 }
1805 static void ConvertDoubleToBytes(unsigned char *p, double val) {
1806  int64_t *vp = (int64_t *)&val;
1807  for (unsigned i = 0; i < sizeof(int64_t); ++i) {
1808  p[i] = (unsigned char)*vp;
1809  *vp >>= 8;
1810  }
1811 }
1812 
1813 void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1814  AggBuffer *aggBuffer) {
1815  const DataLayout &DL = getDataLayout();
1816 
1817  if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1818  int s = DL.getTypeAllocSize(CPV->getType());
1819  if (s < Bytes)
1820  s = Bytes;
1821  aggBuffer->addZeros(s);
1822  return;
1823  }
1824 
1825  unsigned char ptr[8];
1826  switch (CPV->getType()->getTypeID()) {
1827 
1828  case Type::IntegerTyID: {
1829  Type *ETy = CPV->getType();
1830  if (ETy == Type::getInt8Ty(CPV->getContext())) {
1831  unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue();
1832  ConvertIntToBytes<>(ptr, c);
1833  aggBuffer->addBytes(ptr, 1, Bytes);
1834  } else if (ETy == Type::getInt16Ty(CPV->getContext())) {
1835  short int16 = (short)cast<ConstantInt>(CPV)->getZExtValue();
1836  ConvertIntToBytes<>(ptr, int16);
1837  aggBuffer->addBytes(ptr, 2, Bytes);
1838  } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
1839  if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
1840  int int32 = (int)(constInt->getZExtValue());
1841  ConvertIntToBytes<>(ptr, int32);
1842  aggBuffer->addBytes(ptr, 4, Bytes);
1843  break;
1844  } else if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1845  if (const auto *constInt = dyn_cast_or_null<ConstantInt>(
1846  ConstantFoldConstant(Cexpr, DL))) {
1847  int int32 = (int)(constInt->getZExtValue());
1848  ConvertIntToBytes<>(ptr, int32);
1849  aggBuffer->addBytes(ptr, 4, Bytes);
1850  break;
1851  }
1852  if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1853  Value *v = Cexpr->getOperand(0)->stripPointerCasts();
1854  aggBuffer->addSymbol(v, Cexpr->getOperand(0));
1855  aggBuffer->addZeros(4);
1856  break;
1857  }
1858  }
1859  llvm_unreachable("unsupported integer const type");
1860  } else if (ETy == Type::getInt64Ty(CPV->getContext())) {
1861  if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
1862  long long int64 = (long long)(constInt->getZExtValue());
1863  ConvertIntToBytes<>(ptr, int64);
1864  aggBuffer->addBytes(ptr, 8, Bytes);
1865  break;
1866  } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1867  if (const auto *constInt = dyn_cast_or_null<ConstantInt>(
1868  ConstantFoldConstant(Cexpr, DL))) {
1869  long long int64 = (long long)(constInt->getZExtValue());
1870  ConvertIntToBytes<>(ptr, int64);
1871  aggBuffer->addBytes(ptr, 8, Bytes);
1872  break;
1873  }
1874  if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1875  Value *v = Cexpr->getOperand(0)->stripPointerCasts();
1876  aggBuffer->addSymbol(v, Cexpr->getOperand(0));
1877  aggBuffer->addZeros(8);
1878  break;
1879  }
1880  }
1881  llvm_unreachable("unsupported integer const type");
1882  } else
1883  llvm_unreachable("unsupported integer const type");
1884  break;
1885  }
1886  case Type::HalfTyID:
1887  case Type::FloatTyID:
1888  case Type::DoubleTyID: {
1889  const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
1890  Type *Ty = CFP->getType();
1891  if (Ty == Type::getHalfTy(CPV->getContext())) {
1892  APInt API = CFP->getValueAPF().bitcastToAPInt();
1893  uint16_t float16 = API.getLoBits(16).getZExtValue();
1894  ConvertIntToBytes<>(ptr, float16);
1895  aggBuffer->addBytes(ptr, 2, Bytes);
1896  } else if (Ty == Type::getFloatTy(CPV->getContext())) {
1897  float float32 = (float) CFP->getValueAPF().convertToFloat();
1898  ConvertFloatToBytes(ptr, float32);
1899  aggBuffer->addBytes(ptr, 4, Bytes);
1900  } else if (Ty == Type::getDoubleTy(CPV->getContext())) {
1901  double float64 = CFP->getValueAPF().convertToDouble();
1902  ConvertDoubleToBytes(ptr, float64);
1903  aggBuffer->addBytes(ptr, 8, Bytes);
1904  } else {
1905  llvm_unreachable("unsupported fp const type");
1906  }
1907  break;
1908  }
1909  case Type::PointerTyID: {
1910  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1911  aggBuffer->addSymbol(GVar, GVar);
1912  } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1913  const Value *v = Cexpr->stripPointerCasts();
1914  aggBuffer->addSymbol(v, Cexpr);
1915  }
1916  unsigned int s = DL.getTypeAllocSize(CPV->getType());
1917  aggBuffer->addZeros(s);
1918  break;
1919  }
1920 
1921  case Type::ArrayTyID:
1922  case Type::VectorTyID:
1923  case Type::StructTyID: {
1924  if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) {
1925  int ElementSize = DL.getTypeAllocSize(CPV->getType());
1926  bufferAggregateConstant(CPV, aggBuffer);
1927  if (Bytes > ElementSize)
1928  aggBuffer->addZeros(Bytes - ElementSize);
1929  } else if (isa<ConstantAggregateZero>(CPV))
1930  aggBuffer->addZeros(Bytes);
1931  else
1932  llvm_unreachable("Unexpected Constant type");
1933  break;
1934  }
1935 
1936  default:
1937  llvm_unreachable("unsupported type");
1938  }
1939 }
1940 
1941 void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1942  AggBuffer *aggBuffer) {
1943  const DataLayout &DL = getDataLayout();
1944  int Bytes;
1945 
1946  // Integers of arbitrary width
1947  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1948  APInt Val = CI->getValue();
1949  for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; ++I) {
1950  uint8_t Byte = Val.getLoBits(8).getZExtValue();
1951  aggBuffer->addBytes(&Byte, 1, 1);
1952  Val.lshrInPlace(8);
1953  }
1954  return;
1955  }
1956 
1957  // Old constants
1958  if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
1959  if (CPV->getNumOperands())
1960  for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
1961  bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
1962  return;
1963  }
1964 
1965  if (const ConstantDataSequential *CDS =
1966  dyn_cast<ConstantDataSequential>(CPV)) {
1967  if (CDS->getNumElements())
1968  for (unsigned i = 0; i < CDS->getNumElements(); ++i)
1969  bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
1970  aggBuffer);
1971  return;
1972  }
1973 
1974  if (isa<ConstantStruct>(CPV)) {
1975  if (CPV->getNumOperands()) {
1976  StructType *ST = cast<StructType>(CPV->getType());
1977  for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
1978  if (i == (e - 1))
1979  Bytes = DL.getStructLayout(ST)->getElementOffset(0) +
1980  DL.getTypeAllocSize(ST) -
1981  DL.getStructLayout(ST)->getElementOffset(i);
1982  else
1983  Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) -
1984  DL.getStructLayout(ST)->getElementOffset(i);
1985  bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
1986  }
1987  }
1988  return;
1989  }
1990  llvm_unreachable("unsupported constant type in printAggregateConstant()");
1991 }
1992 
1993 /// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly
1994 /// a copy from AsmPrinter::lowerConstant, except customized to only handle
1995 /// expressions that are representable in PTX and create
1996 /// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
1997 const MCExpr *
1998 NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) {
1999  MCContext &Ctx = OutContext;
2000 
2001  if (CV->isNullValue() || isa<UndefValue>(CV))
2002  return MCConstantExpr::create(0, Ctx);
2003 
2004  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
2005  return MCConstantExpr::create(CI->getZExtValue(), Ctx);
2006 
2007  if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
2008  const MCSymbolRefExpr *Expr =
2010  if (ProcessingGeneric) {
2011  return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
2012  } else {
2013  return Expr;
2014  }
2015  }
2016 
2017  const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
2018  if (!CE) {
2019  llvm_unreachable("Unknown constant value to lower!");
2020  }
2021 
2022  switch (CE->getOpcode()) {
2023  default:
2024  // If the code isn't optimized, there may be outstanding folding
2025  // opportunities. Attempt to fold the expression using DataLayout as a
2026  // last resort before giving up.
2028  if (C && C != CE)
2029  return lowerConstantForGV(C, ProcessingGeneric);
2030 
2031  // Otherwise report the problem to the user.
2032  {
2033  std::string S;
2034  raw_string_ostream OS(S);
2035  OS << "Unsupported expression in static initializer: ";
2036  CE->printAsOperand(OS, /*PrintType=*/false,
2037  !MF ? nullptr : MF->getFunction().getParent());
2038  report_fatal_error(OS.str());
2039  }
2040 
2041  case Instruction::AddrSpaceCast: {
2042  // Strip the addrspacecast and pass along the operand
2043  PointerType *DstTy = cast<PointerType>(CE->getType());
2044  if (DstTy->getAddressSpace() == 0) {
2045  return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
2046  }
2047  std::string S;
2048  raw_string_ostream OS(S);
2049  OS << "Unsupported expression in static initializer: ";
2050  CE->printAsOperand(OS, /*PrintType=*/ false,
2051  !MF ? nullptr : MF->getFunction().getParent());
2052  report_fatal_error(OS.str());
2053  }
2054 
2055  case Instruction::GetElementPtr: {
2056  const DataLayout &DL = getDataLayout();
2057 
2058  // Generate a symbolic expression for the byte address
2059  APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
2060  cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
2061 
2062  const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
2063  ProcessingGeneric);
2064  if (!OffsetAI)
2065  return Base;
2066 
2067  int64_t Offset = OffsetAI.getSExtValue();
2068  return MCBinaryExpr::createAdd(Base, MCConstantExpr::create(Offset, Ctx),
2069  Ctx);
2070  }
2071 
2072  case Instruction::Trunc:
2073  // We emit the value and depend on the assembler to truncate the generated
2074  // expression properly. This is important for differences between
2075  // blockaddress labels. Since the two labels are in the same function, it
2076  // is reasonable to treat their delta as a 32-bit value.
2078  case Instruction::BitCast:
2079  return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2080 
2081  case Instruction::IntToPtr: {
2082  const DataLayout &DL = getDataLayout();
2083 
2084  // Handle casts to pointers by changing them into casts to the appropriate
2085  // integer type. This promotes constant folding and simplifies this code.
2086  Constant *Op = CE->getOperand(0);
2088  false/*ZExt*/);
2089  return lowerConstantForGV(Op, ProcessingGeneric);
2090  }
2091 
2092  case Instruction::PtrToInt: {
2093  const DataLayout &DL = getDataLayout();
2094 
2095  // Support only foldable casts to/from pointers that can be eliminated by
2096  // changing the pointer to the appropriately sized integer type.
2097  Constant *Op = CE->getOperand(0);
2098  Type *Ty = CE->getType();
2099 
2100  const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
2101 
2102  // We can emit the pointer value into this slot if the slot is an
2103  // integer slot equal to the size of the pointer.
2104  if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
2105  return OpExpr;
2106 
2107  // Otherwise the pointer is smaller than the resultant integer, mask off
2108  // the high bits so we are sure to get a proper truncation if the input is
2109  // a constant expr.
2110  unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
2111  const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
2112  return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
2113  }
2114 
2115  // The MC library also has a right-shift operator, but it isn't consistently
2116  // signed or unsigned between different targets.
2117  case Instruction::Add: {
2118  const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2119  const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
2120  switch (CE->getOpcode()) {
2121  default: llvm_unreachable("Unknown binary operator constant cast expr");
2122  case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
2123  }
2124  }
2125  }
2126 }
2127 
2128 // Copy of MCExpr::print customized for NVPTX
2129 void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
2130  switch (Expr.getKind()) {
2131  case MCExpr::Target:
2132  return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI);
2133  case MCExpr::Constant:
2134  OS << cast<MCConstantExpr>(Expr).getValue();
2135  return;
2136 
2137  case MCExpr::SymbolRef: {
2138  const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr);
2139  const MCSymbol &Sym = SRE.getSymbol();
2140  Sym.print(OS, MAI);
2141  return;
2142  }
2143 
2144  case MCExpr::Unary: {
2145  const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr);
2146  switch (UE.getOpcode()) {
2147  case MCUnaryExpr::LNot: OS << '!'; break;
2148  case MCUnaryExpr::Minus: OS << '-'; break;
2149  case MCUnaryExpr::Not: OS << '~'; break;
2150  case MCUnaryExpr::Plus: OS << '+'; break;
2151  }
2152  printMCExpr(*UE.getSubExpr(), OS);
2153  return;
2154  }
2155 
2156  case MCExpr::Binary: {
2157  const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr);
2158 
2159  // Only print parens around the LHS if it is non-trivial.
2160  if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) ||
2161  isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) {
2162  printMCExpr(*BE.getLHS(), OS);
2163  } else {
2164  OS << '(';
2165  printMCExpr(*BE.getLHS(), OS);
2166  OS<< ')';
2167  }
2168 
2169  switch (BE.getOpcode()) {
2170  case MCBinaryExpr::Add:
2171  // Print "X-42" instead of "X+-42".
2172  if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
2173  if (RHSC->getValue() < 0) {
2174  OS << RHSC->getValue();
2175  return;
2176  }
2177  }
2178 
2179  OS << '+';
2180  break;
2181  default: llvm_unreachable("Unhandled binary operator");
2182  }
2183 
2184  // Only print parens around the LHS if it is non-trivial.
2185  if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
2186  printMCExpr(*BE.getRHS(), OS);
2187  } else {
2188  OS << '(';
2189  printMCExpr(*BE.getRHS(), OS);
2190  OS << ')';
2191  }
2192  return;
2193  }
2194  }
2195 
2196  llvm_unreachable("Invalid expression kind!");
2197 }
2198 
2199 /// PrintAsmOperand - Print out an operand for an inline asm expression.
2200 ///
2201 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
2202  unsigned AsmVariant,
2203  const char *ExtraCode, raw_ostream &O) {
2204  if (ExtraCode && ExtraCode[0]) {
2205  if (ExtraCode[1] != 0)
2206  return true; // Unknown modifier.
2207 
2208  switch (ExtraCode[0]) {
2209  default:
2210  // See if this is a generic print operand
2211  return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
2212  case 'r':
2213  break;
2214  }
2215  }
2216 
2217  printOperand(MI, OpNo, O);
2218 
2219  return false;
2220 }
2221 
2222 bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
2223  const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,
2224  const char *ExtraCode, raw_ostream &O) {
2225  if (ExtraCode && ExtraCode[0])
2226  return true; // Unknown modifier
2227 
2228  O << '[';
2229  printMemOperand(MI, OpNo, O);
2230  O << ']';
2231 
2232  return false;
2233 }
2234 
2235 void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
2236  raw_ostream &O, const char *Modifier) {
2237  const MachineOperand &MO = MI->getOperand(opNum);
2238  switch (MO.getType()) {
2241  if (MO.getReg() == NVPTX::VRDepot)
2242  O << DEPOTNAME << getFunctionNumber();
2243  else
2245  } else {
2246  emitVirtualRegister(MO.getReg(), O);
2247  }
2248  return;
2249 
2251  if (!Modifier)
2252  O << MO.getImm();
2253  else if (strstr(Modifier, "vec") == Modifier)
2254  printVecModifiedImmediate(MO, Modifier, O);
2255  else
2257  "Don't know how to handle modifier on immediate operand");
2258  return;
2259 
2261  printFPConstant(MO.getFPImm(), O);
2262  break;
2263 
2265  getSymbol(MO.getGlobal())->print(O, MAI);
2266  break;
2267 
2269  MO.getMBB()->getSymbol()->print(O, MAI);
2270  return;
2271 
2272  default:
2273  llvm_unreachable("Operand type not supported.");
2274  }
2275 }
2276 
2277 void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
2278  raw_ostream &O, const char *Modifier) {
2279  printOperand(MI, opNum, O);
2280 
2281  if (Modifier && strcmp(Modifier, "add") == 0) {
2282  O << ", ";
2283  printOperand(MI, opNum + 1, O);
2284  } else {
2285  if (MI->getOperand(opNum + 1).isImm() &&
2286  MI->getOperand(opNum + 1).getImm() == 0)
2287  return; // don't print ',0' or '+0'
2288  O << "+";
2289  printOperand(MI, opNum + 1, O);
2290  }
2291 }
2292 
2293 // Force static initialization.
2297 }
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
uint64_t CallInst * C
StringRef getSection() const
Get the custom section of this global if it has one.
Definition: GlobalObject.h:90
unsigned getAlignment() const
Definition: GlobalObject.h:59
unsigned getPTXVersion() const
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static Type * getDoubleTy(LLVMContext &C)
Definition: Type.cpp:165
unsigned getOpcode() const
Return the opcode at the root of this constant expression.
Definition: Constants.h:1210
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1563
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
Bitwise negation.
Definition: MCExpr.h:365
MachineBasicBlock * getMBB() const
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition: AsmPrinter.h:94
bool hasDebugInfo() const
Returns true if valid debug info is present.
MCSymbol * GetExternalSymbolSymbol(StringRef Sym) const
Return the MCSymbol for the specified ExternalSymbol.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:323
bool getAlign(const Function &F, unsigned index, unsigned &align)
bool hasPrivateLinkage() const
Definition: GlobalValue.h:435
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
This class represents lattice values for constants.
Definition: AllocatorList.h:24
bool getMaxNReg(const Function &F, unsigned &x)
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:42
static unsigned index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
unsigned getPointerPrefAlignment(unsigned AS=0) const
Return target&#39;s alignment for stack-based pointers FIXME: The defaults need to be removed once all of...
Definition: DataLayout.cpp:620
2: 32-bit floating point type
Definition: Type.h:59
bool doFinalization(Module &M) override
Shut down the asmprinter.
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition: AsmPrinter.h:89
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:137
Implements a dense probed hash-table based set.
Definition: DenseSet.h:250
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:588
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:164
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
bool isTexture(const Value &val)
unsigned Reg
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:423
Opcode getOpcode() const
Get the kind of this unary expression.
Definition: MCExpr.h:404
float convertToFloat() const
Definition: APFloat.h:1098
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition: Module.h:402
const MCExpr * getLHS() const
Get the left-hand side expression of the binary operator.
Definition: MCExpr.h:564
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:705
Unary plus.
Definition: MCExpr.h:366
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:510
MachineBasicBlock reference.
13: Structures
Definition: Type.h:73
unsigned const TargetRegisterInfo * TRI
Metadata node.
Definition: Metadata.h:864
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:466
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:141
F(f)
MachineFunction * MF
The current machine function.
Definition: AsmPrinter.h:97
1: 16-bit floating point type
Definition: Type.h:58
const GlobalListType & getGlobalList() const
Get the Module&#39;s list of global variables (constant).
Definition: Module.h:521
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
15: Pointers
Definition: Type.h:75
static IntegerType * getInt16Ty(LLVMContext &C)
Definition: Type.cpp:175
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:128
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
static const NVPTXFloatMCExpr * createConstantFPSingle(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:49
bool hasImageHandles() const
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static void DiscoverDependentGlobals(const Value *V, DenseSet< const GlobalVariable *> &Globals)
DiscoverDependentGlobals - Return a set of GlobalVariables on which V depends.
const MCSymbol * getFunctionFrameSymbol() const override
Return symbol for the function pseudo stack if the stack frame is not a register based.
static Constant * getIntegerCast(Constant *C, Type *Ty, bool isSigned)
Create a ZExt, Bitcast or Trunc for integer -> integer casts.
Definition: Constants.cpp:1613
amdgpu Simplify well known AMD library false Value Value const Twine & Name
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:516
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:116
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static Type * getFloatTy(LLVMContext &C)
Definition: Type.cpp:164
unsigned getPointerTypeSizeInBits(Type *) const
Layout pointer size, in bits, based on the type.
Definition: DataLayout.cpp:646
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:138
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
static bool usedInOneFunc(const User *U, Function const *&oneFunc)
const ConstantFP * getFPImm() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:412
Class to represent struct types.
Definition: DerivedTypes.h:201
void clearAnnotationCache(const Module *Mod)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:36
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
Name of external global symbol.
This file contains the simple types necessary to represent the attributes associated with functions a...
Represent a reference to a symbol from inside an expression.
Definition: MCExpr.h:166
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
static const NVPTXFloatMCExpr * createConstantFPDouble(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:54
const char * getSymbolName() const
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:978
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
This file implements a class to represent arbitrary precision integral constant values and operations...
bool hasCommonLinkage() const
Definition: GlobalValue.h:440
Target & getTheNVPTXTarget64()
unsigned getNumRegClasses() const
bool getMaxNTIDz(const Function &F, unsigned &z)
unsigned getSizeInBits() const
Context object for machine code objects.
Definition: MCContext.h:63
bool hasExternalLinkage() const
Definition: GlobalValue.h:422
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Attempt to fold the constant using the specified DataLayout.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:267
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:85
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:889
Class to represent function types.
Definition: DerivedTypes.h:103
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1575
bool isKernelFunction(const Function &F)
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:406
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
bool isSurface(const Value &val)
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
Definition: AsmPrinter.h:296
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:4444
ConstantDataSequential - A vector or array constant whose element type is a simple 1/2/4/8-byte integ...
Definition: Constants.h:574
const MCExpr * getRHS() const
Get the right-hand side expression of the binary operator.
Definition: MCExpr.h:567
#define T
Unary assembler expressions.
Definition: MCExpr.h:360
Class to represent array types.
Definition: DerivedTypes.h:369
bool getMaxNTIDx(const Function &F, unsigned &x)
std::string getTextureName(const Value &val)
ManagedStringPool * getManagedStrPool() const
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:212
bool runOnMachineFunction(MachineFunction &F) override
Emit the specified function out to the OutStreamer.
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:224
RegisterAsmPrinter - Helper template for registering a target specific assembly printer, for use in the target machine initialization function.
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:123
#define DEPOTNAME
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:461
bool hasLinkOnceLinkage() const
Definition: GlobalValue.h:426
bool isSampler(const Value &val)
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:84
const NVPTXSubtarget * getSubtargetImpl(const Function &) const override
Virtual method implemented by subclasses that returns a reference to that target&#39;s TargetSubtargetInf...
Unary expressions.
Definition: MCExpr.h:42
MachineModuleInfo * MMI
This is a pointer to the current MachineModuleInfo.
Definition: AsmPrinter.h:100
Value * getOperand(unsigned i) const
Definition: User.h:170
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:161
static const NVPTXGenericMCSymbolRefExpr * create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx)
Definition: NVPTXMCExpr.cpp:55
Class to represent pointers.
Definition: DerivedTypes.h:467
bool hasAppendingLinkage() const
Definition: GlobalValue.h:433
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:176
11: Arbitrary bit width integers
Definition: Type.h:71
Target & getTheNVPTXTarget32()
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:750
0: type with no size
Definition: Type.h:57
Address of a global value.
static bool isEmptyXXStructor(GlobalVariable *GV)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
void print(raw_ostream &OS, const MCAsmInfo *MAI, bool InParens=false) const
Definition: MCExpr.cpp:42
unsigned const MachineRegisterInfo * MRI
MCSymbol * CurrentFnSym
The symbol for the current function.
Definition: AsmPrinter.h:113
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Machine Value Type.
bool hasName() const
Definition: Value.h:251
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
const MCAsmInfo * MAI
Target Asm Printer information.
Definition: AsmPrinter.h:85
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const char * getImageHandleSymbol(unsigned Idx) const
Returns the symbol name at the given index.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
static bool is64Bit(const char *name)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, HexagonAsmPrinter &Printer, bool MustExtend)
const GlobalValue * getGlobal() const
const NVPTXTargetLowering * getTargetLowering() const override
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant...
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
size_t alias_size() const
Definition: Module.h:621
double convertToDouble() const
Definition: APFloat.h:1097
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:82
std::string getTargetName() const
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:740
This file declares a class to represent arbitrary precision floating point values and provide a varie...
unsigned getMaxAlignment() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
bool hasInternalLinkage() const
Definition: GlobalValue.h:434
bool isHalfTy() const
Return true if this is &#39;half&#39;, a 16-bit IEEE fp type.
Definition: Type.h:144
static const char * getRegisterName(unsigned RegNo)
This class describes a target machine that is implemented with the LLVM target-independent code gener...
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:495
NVPTX::DrvInterface getDrvInterface() const
unsigned size() const
Definition: DenseMap.h:126
Logical negation.
Definition: MCExpr.h:363
Extended Value Type.
Definition: ValueTypes.h:34
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:529
Binary assembler expressions.
Definition: MCExpr.h:417
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
size_t size() const
Definition: SmallVector.h:53
std::string getVirtualRegisterName(unsigned) const
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:4225
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:499
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool getMaxNTIDy(const Function &F, unsigned &y)
bool getReqNTIDx(const Function &F, unsigned &x)
unsigned first
bool hasWeakLinkage() const
Definition: GlobalValue.h:430
const APFloat & getValueAPF() const
Definition: Constants.h:303
bool isImage(const Value &val)
14: Arrays
Definition: Type.h:74
bool getReqNTIDz(const Function &F, unsigned &z)
unsigned getFunctionNumber() const
Return a unique ID for the current function.
Definition: AsmPrinter.cpp:208
static Type * getHalfTy(LLVMContext &C)
Definition: Type.cpp:163
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:120
static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO, const MachineFunction *MF, const Module *M, const MachineFrameInfo *MFI, const TargetInstrInfo *TII, LLVMContext &Ctx)
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:192
bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const
Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
void setOpcode(unsigned Op)
Definition: MCInst.h:173
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1167
static void ConvertFloatToBytes(unsigned char *p, float val)
std::string * getManagedString(const char *S)
16: SIMD &#39;packed&#39; format, or other vector type
Definition: Type.h:76
static void printMCExpr(const MCExpr *E, raw_ostream &OS)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
MCSymbol * getSymbol(const GlobalValue *GV) const
Definition: AsmPrinter.cpp:430
const MCSymbol & getSymbol() const
Definition: MCExpr.h:336
ExprKind getKind() const
Definition: MCExpr.h:73
virtual const MCExpr * lowerConstant(const Constant *CV)
Lower the specified LLVM Constant to an MCExpr.
MachineOperand class - Representation of each machine instruction operand.
bool hasSection() const
Check if this global has a custom object file section.
Definition: GlobalObject.h:82
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
Module.h This file contains the declarations for the Module class.
bool isVector(MCInstrInfo const &MCII, MCInst const &MCI)
AddressSpace
Definition: NVPTXBaseInfo.h:22
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:730
bool isImageReadWrite(const Value &val)
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:258
void EmitToStreamer(MCStreamer &S, const MCInst &Inst)
Definition: AsmPrinter.cpp:231
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
StringRef str()
Return a StringRef for the vector contents.
Definition: raw_ostream.h:535
std::string getSurfaceName(const Value &val)
const char * getName(unsigned RegNo) const
static void VisitGlobalVariableForEmission(const GlobalVariable *GV, SmallVectorImpl< const GlobalVariable *> &Order, DenseSet< const GlobalVariable *> &Visited, DenseSet< const GlobalVariable *> &Visiting)
VisitGlobalVariableForEmission - Add GV to the list of GlobalVariable instances to be emitted...
std::string utostr(uint64_t X, bool isNeg=false)
Definition: StringExtras.h:224
int64_t getImm() const
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:194
const Function & getFunction() const
Return the LLVM function that this machine code represents.
std::string getSamplerName(const Value &val)
Class for arbitrary precision integers.
Definition: APInt.h:70
ConstantArray - Constant Array Declarations.
Definition: Constants.h:414
bool isManaged(const Value &val)
bool getReqNTIDy(const Function &F, unsigned &y)
iterator_range< user_iterator > users()
Definition: Value.h:400
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:896
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:436
Representation of each machine instruction.
Definition: MachineInstr.h:64
pointer remove(iterator &IT)
Definition: ilist.h:251
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
static bool printOperand(raw_ostream &OS, const SelectionDAG *G, const SDValue Value)
unsigned getParamAlignment(unsigned ArgNo) const
Return the alignment for the specified function parameter.
NVPTXTargetMachine.
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
Definition: AsmPrinter.cpp:248
iterator insert(iterator where, pointer New)
Definition: ilist.h:228
iterator end()
Definition: Module.h:597
const Function * getParent() const
Definition: Argument.h:42
std::string getNVPTXRegClassStr(TargetRegisterClass const *RC)
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:551
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:123
virtual void EmitBasicBlockStart(const MachineBasicBlock &MBB) const
Targets can override this to emit stuff at the start of a basic block.
static bool usedInGlobalVarDef(const Constant *C)
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width, bool Upper=false)
format_hex_no_prefix - Output N as a fixed width hexadecimal.
Definition: Format.h:199
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
#define I(x, y, z)
Definition: MD5.cpp:58
std::string getNVPTXRegClassName(TargetRegisterClass const *RC)
iterator_range< debug_compile_units_iterator > debug_compile_units() const
Return an iterator for all DICompileUnits listed in this Module&#39;s llvm.dbg.cu named metadata node and...
Definition: Module.h:778
iterator begin()
Definition: Module.h:595
iterator end()
Definition: DenseMap.h:109
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
virtual void print(raw_ostream &OS, const Module *M) const
print - Print out the internal state of the pass.
Definition: Pass.cpp:124
References to labels and assigned expressions.
Definition: MCExpr.h:41
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Type * getValueType() const
Definition: GlobalValue.h:276
Unary minus.
Definition: MCExpr.h:364
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:92
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
static const NVPTXFloatMCExpr * createConstantFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:44
static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f)
const std::string & getModuleInlineAsm() const
Get any module-scope inline assembly blocks.
Definition: Module.h:248
Opcode getOpcode() const
Get the kind of this binary expression.
Definition: MCExpr.h:561
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:206
3: 64-bit floating point type
Definition: Type.h:60
Implments NVPTX-specific streamer.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Equivalent to hasAttribute(AttributeList::FunctionIndex, Kind) but may be faster. ...
static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool useFuncSeen(const Constant *C, DenseMap< const Function *, bool > &seenMap)
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:483
const MCExpr * getSubExpr() const
Get the child of this unary expression.
Definition: MCExpr.h:407
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition: Type.h:250
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566
LLVM Value Representation.
Definition: Value.h:73
bool isLoopHeader(const MachineBasicBlock *BB) const
True if the block is a loop header node.
static const char * name
Floating-point immediate operand.
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:419
bool hasInitializer() const
Definitions have initializers, declarations don&#39;t.
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
Constant expressions.
Definition: MCExpr.h:40
Binary expressions.
Definition: MCExpr.h:39
unsigned int getSmVersion() const
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
uint64_t getTypeAllocSizeInBits(Type *Ty) const
Returns the offset in bits between successive objects of the specified type, including alignment padd...
Definition: DataLayout.h:446
const DataLayout & getDataLayout() const
Return information about data layout.
Definition: AsmPrinter.cpp:216
static void ConvertDoubleToBytes(unsigned char *p, double val)
iterator_range< global_iterator > globals()
Definition: Module.h:584
IRTranslator LLVM IR MI
void addOperand(const MCOperand &Op)
Definition: MCInst.h:186
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
APInt bitcastToAPInt() const
Definition: APFloat.h:1094
bool getMinCTASm(const Function &F, unsigned &x)
Target specific expression.
Definition: MCExpr.h:43
static void ConvertIntToBytes(unsigned char *p, T val)
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:35
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects...
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:123
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:174
bool use_empty() const
Definition: Value.h:323
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx)
Definition: MCExpr.cpp:164
bool isImageWriteOnly(const Value &val)
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:274
void LLVMInitializeNVPTXAsmPrinter()
bool doFinalization(Module &M) override
Shut down the asmprinter.
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
Definition: Instruction.h:67
const NVPTXRegisterInfo * getRegisterInfo() const override
void print(raw_ostream &OS, const MCAsmInfo *MAI) const
print - Print the value to the stream OS.
Definition: MCSymbol.cpp:60