LLVM  8.0.1
Lint.cpp
Go to the documentation of this file.
1 //===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass statically checks for common and easily-identified constructs
11 // which produce undefined or likely unintended behavior in LLVM IR.
12 //
13 // It is not a guarantee of correctness, in two ways. First, it isn't
14 // comprehensive. There are checks which could be done statically which are
15 // not yet implemented. Some of these are indicated by TODO comments, but
16 // those aren't comprehensive either. Second, many conditions cannot be
17 // checked statically. This pass does no dynamic instrumentation, so it
18 // can't check for all possible problems.
19 //
20 // Another limitation is that it assumes all code will be executed. A store
21 // through a null pointer in a basic block which is never reached is harmless,
22 // but this pass will warn about it anyway. This is the main reason why most
23 // of these checks live here instead of in the Verifier pass.
24 //
25 // Optimization passes may make conditions that this pass checks for more or
26 // less obvious. If an optimization pass appears to be introducing a warning,
27 // it may be that the optimization pass is merely exposing an existing
28 // condition in the code.
29 //
30 // This code may be run before instcombine. In many cases, instcombine checks
31 // for the same kinds of things and turns instructions with undefined behavior
32 // into unreachable (or equivalent). Because of this, this pass makes some
33 // effort to look through bitcasts and so on.
34 //
35 //===----------------------------------------------------------------------===//
36 
37 #include "llvm/Analysis/Lint.h"
38 #include "llvm/ADT/APInt.h"
39 #include "llvm/ADT/ArrayRef.h"
40 #include "llvm/ADT/SmallPtrSet.h"
41 #include "llvm/ADT/Twine.h"
46 #include "llvm/Analysis/Loads.h"
48 #include "llvm/Analysis/Passes.h"
51 #include "llvm/IR/Argument.h"
52 #include "llvm/IR/BasicBlock.h"
53 #include "llvm/IR/CallSite.h"
54 #include "llvm/IR/Constant.h"
55 #include "llvm/IR/Constants.h"
56 #include "llvm/IR/DataLayout.h"
57 #include "llvm/IR/DerivedTypes.h"
58 #include "llvm/IR/Dominators.h"
59 #include "llvm/IR/Function.h"
60 #include "llvm/IR/GlobalVariable.h"
61 #include "llvm/IR/InstVisitor.h"
62 #include "llvm/IR/InstrTypes.h"
63 #include "llvm/IR/Instruction.h"
64 #include "llvm/IR/Instructions.h"
65 #include "llvm/IR/IntrinsicInst.h"
67 #include "llvm/IR/Module.h"
68 #include "llvm/IR/Type.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/Pass.h"
71 #include "llvm/Support/Casting.h"
72 #include "llvm/Support/Debug.h"
73 #include "llvm/Support/KnownBits.h"
76 #include <cassert>
77 #include <cstdint>
78 #include <iterator>
79 #include <string>
80 
81 using namespace llvm;
82 
83 namespace {
84  namespace MemRef {
85  static const unsigned Read = 1;
86  static const unsigned Write = 2;
87  static const unsigned Callee = 4;
88  static const unsigned Branchee = 8;
89  } // end namespace MemRef
90 
91  class Lint : public FunctionPass, public InstVisitor<Lint> {
92  friend class InstVisitor<Lint>;
93 
94  void visitFunction(Function &F);
95 
96  void visitCallSite(CallSite CS);
97  void visitMemoryReference(Instruction &I, Value *Ptr,
98  uint64_t Size, unsigned Align,
99  Type *Ty, unsigned Flags);
100  void visitEHBeginCatch(IntrinsicInst *II);
101  void visitEHEndCatch(IntrinsicInst *II);
102 
103  void visitCallInst(CallInst &I);
104  void visitInvokeInst(InvokeInst &I);
105  void visitReturnInst(ReturnInst &I);
106  void visitLoadInst(LoadInst &I);
107  void visitStoreInst(StoreInst &I);
108  void visitXor(BinaryOperator &I);
109  void visitSub(BinaryOperator &I);
110  void visitLShr(BinaryOperator &I);
111  void visitAShr(BinaryOperator &I);
112  void visitShl(BinaryOperator &I);
113  void visitSDiv(BinaryOperator &I);
114  void visitUDiv(BinaryOperator &I);
115  void visitSRem(BinaryOperator &I);
116  void visitURem(BinaryOperator &I);
117  void visitAllocaInst(AllocaInst &I);
118  void visitVAArgInst(VAArgInst &I);
119  void visitIndirectBrInst(IndirectBrInst &I);
120  void visitExtractElementInst(ExtractElementInst &I);
121  void visitInsertElementInst(InsertElementInst &I);
122  void visitUnreachableInst(UnreachableInst &I);
123 
124  Value *findValue(Value *V, bool OffsetOk) const;
125  Value *findValueImpl(Value *V, bool OffsetOk,
126  SmallPtrSetImpl<Value *> &Visited) const;
127 
128  public:
129  Module *Mod;
130  const DataLayout *DL;
131  AliasAnalysis *AA;
132  AssumptionCache *AC;
133  DominatorTree *DT;
134  TargetLibraryInfo *TLI;
135 
136  std::string Messages;
137  raw_string_ostream MessagesStr;
138 
139  static char ID; // Pass identification, replacement for typeid
140  Lint() : FunctionPass(ID), MessagesStr(Messages) {
142  }
143 
144  bool runOnFunction(Function &F) override;
145 
146  void getAnalysisUsage(AnalysisUsage &AU) const override {
147  AU.setPreservesAll();
152  }
153  void print(raw_ostream &O, const Module *M) const override {}
154 
155  void WriteValues(ArrayRef<const Value *> Vs) {
156  for (const Value *V : Vs) {
157  if (!V)
158  continue;
159  if (isa<Instruction>(V)) {
160  MessagesStr << *V << '\n';
161  } else {
162  V->printAsOperand(MessagesStr, true, Mod);
163  MessagesStr << '\n';
164  }
165  }
166  }
167 
168  /// A check failed, so printout out the condition and the message.
169  ///
170  /// This provides a nice place to put a breakpoint if you want to see why
171  /// something is not correct.
172  void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; }
173 
174  /// A check failed (with values to print).
175  ///
176  /// This calls the Message-only version so that the above is easier to set
177  /// a breakpoint on.
178  template <typename T1, typename... Ts>
179  void CheckFailed(const Twine &Message, const T1 &V1, const Ts &...Vs) {
180  CheckFailed(Message);
181  WriteValues({V1, Vs...});
182  }
183  };
184 } // end anonymous namespace
185 
186 char Lint::ID = 0;
187 INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
188  false, true)
193 INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
194  false, true)
195 
196 // Assert - We know that cond should be true, if not print an error message.
197 #define Assert(C, ...) \
198  do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (false)
199 
200 // Lint::run - This is the main Analysis entry point for a
201 // function.
202 //
204  Mod = F.getParent();
205  DL = &F.getParent()->getDataLayout();
206  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
207  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
208  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
209  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
210  visit(F);
211  dbgs() << MessagesStr.str();
212  Messages.clear();
213  return false;
214 }
215 
216 void Lint::visitFunction(Function &F) {
217  // This isn't undefined behavior, it's just a little unusual, and it's a
218  // fairly common mistake to neglect to name a function.
219  Assert(F.hasName() || F.hasLocalLinkage(),
220  "Unusual: Unnamed function with non-local linkage", &F);
221 
222  // TODO: Check for irreducible control flow.
223 }
224 
225 void Lint::visitCallSite(CallSite CS) {
226  Instruction &I = *CS.getInstruction();
227  Value *Callee = CS.getCalledValue();
228 
229  visitMemoryReference(I, Callee, MemoryLocation::UnknownSize, 0, nullptr,
230  MemRef::Callee);
231 
232  if (Function *F = dyn_cast<Function>(findValue(Callee,
233  /*OffsetOk=*/false))) {
234  Assert(CS.getCallingConv() == F->getCallingConv(),
235  "Undefined behavior: Caller and callee calling convention differ",
236  &I);
237 
238  FunctionType *FT = F->getFunctionType();
239  unsigned NumActualArgs = CS.arg_size();
240 
241  Assert(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs
242  : FT->getNumParams() == NumActualArgs,
243  "Undefined behavior: Call argument count mismatches callee "
244  "argument count",
245  &I);
246 
247  Assert(FT->getReturnType() == I.getType(),
248  "Undefined behavior: Call return type mismatches "
249  "callee return type",
250  &I);
251 
252  // Check argument types (in case the callee was casted) and attributes.
253  // TODO: Verify that caller and callee attributes are compatible.
254  Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end();
255  CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
256  for (; AI != AE; ++AI) {
257  Value *Actual = *AI;
258  if (PI != PE) {
259  Argument *Formal = &*PI++;
260  Assert(Formal->getType() == Actual->getType(),
261  "Undefined behavior: Call argument type mismatches "
262  "callee parameter type",
263  &I);
264 
265  // Check that noalias arguments don't alias other arguments. This is
266  // not fully precise because we don't know the sizes of the dereferenced
267  // memory regions.
268  if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) {
269  AttributeList PAL = CS.getAttributes();
270  unsigned ArgNo = 0;
271  for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) {
272  // Skip ByVal arguments since they will be memcpy'd to the callee's
273  // stack so we're not really passing the pointer anyway.
274  if (PAL.hasParamAttribute(ArgNo++, Attribute::ByVal))
275  continue;
276  if (AI != BI && (*BI)->getType()->isPointerTy()) {
277  AliasResult Result = AA->alias(*AI, *BI);
278  Assert(Result != MustAlias && Result != PartialAlias,
279  "Unusual: noalias argument aliases another argument", &I);
280  }
281  }
282  }
283 
284  // Check that an sret argument points to valid memory.
285  if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
286  Type *Ty =
287  cast<PointerType>(Formal->getType())->getElementType();
288  visitMemoryReference(I, Actual, DL->getTypeStoreSize(Ty),
289  DL->getABITypeAlignment(Ty), Ty,
290  MemRef::Read | MemRef::Write);
291  }
292  }
293  }
294  }
295 
296  if (CS.isCall()) {
297  const CallInst *CI = cast<CallInst>(CS.getInstruction());
298  if (CI->isTailCall()) {
299  const AttributeList &PAL = CI->getAttributes();
300  unsigned ArgNo = 0;
301  for (Value *Arg : CS.args()) {
302  // Skip ByVal arguments since they will be memcpy'd to the callee's
303  // stack anyway.
304  if (PAL.hasParamAttribute(ArgNo++, Attribute::ByVal))
305  continue;
306  Value *Obj = findValue(Arg, /*OffsetOk=*/true);
307  Assert(!isa<AllocaInst>(Obj),
308  "Undefined behavior: Call with \"tail\" keyword references "
309  "alloca",
310  &I);
311  }
312  }
313  }
314 
315 
316  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
317  switch (II->getIntrinsicID()) {
318  default: break;
319 
320  // TODO: Check more intrinsics
321 
322  case Intrinsic::memcpy: {
323  MemCpyInst *MCI = cast<MemCpyInst>(&I);
324  // TODO: If the size is known, use it.
325  visitMemoryReference(I, MCI->getDest(), MemoryLocation::UnknownSize,
326  MCI->getDestAlignment(), nullptr, MemRef::Write);
327  visitMemoryReference(I, MCI->getSource(), MemoryLocation::UnknownSize,
328  MCI->getSourceAlignment(), nullptr, MemRef::Read);
329 
330  // Check that the memcpy arguments don't overlap. The AliasAnalysis API
331  // isn't expressive enough for what we really want to do. Known partial
332  // overlap is not distinguished from the case where nothing is known.
333  auto Size = LocationSize::unknown();
334  if (const ConstantInt *Len =
335  dyn_cast<ConstantInt>(findValue(MCI->getLength(),
336  /*OffsetOk=*/false)))
337  if (Len->getValue().isIntN(32))
338  Size = LocationSize::precise(Len->getValue().getZExtValue());
339  Assert(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
340  MustAlias,
341  "Undefined behavior: memcpy source and destination overlap", &I);
342  break;
343  }
344  case Intrinsic::memmove: {
345  MemMoveInst *MMI = cast<MemMoveInst>(&I);
346  // TODO: If the size is known, use it.
347  visitMemoryReference(I, MMI->getDest(), MemoryLocation::UnknownSize,
348  MMI->getDestAlignment(), nullptr, MemRef::Write);
349  visitMemoryReference(I, MMI->getSource(), MemoryLocation::UnknownSize,
350  MMI->getSourceAlignment(), nullptr, MemRef::Read);
351  break;
352  }
353  case Intrinsic::memset: {
354  MemSetInst *MSI = cast<MemSetInst>(&I);
355  // TODO: If the size is known, use it.
356  visitMemoryReference(I, MSI->getDest(), MemoryLocation::UnknownSize,
357  MSI->getDestAlignment(), nullptr, MemRef::Write);
358  break;
359  }
360 
361  case Intrinsic::vastart:
363  "Undefined behavior: va_start called in a non-varargs function",
364  &I);
365 
366  visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
367  nullptr, MemRef::Read | MemRef::Write);
368  break;
369  case Intrinsic::vacopy:
370  visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
371  nullptr, MemRef::Write);
372  visitMemoryReference(I, CS.getArgument(1), MemoryLocation::UnknownSize, 0,
373  nullptr, MemRef::Read);
374  break;
375  case Intrinsic::vaend:
376  visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
377  nullptr, MemRef::Read | MemRef::Write);
378  break;
379 
381  // Stackrestore doesn't read or write memory, but it sets the
382  // stack pointer, which the compiler may read from or write to
383  // at any time, so check it for both readability and writeability.
384  visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
385  nullptr, MemRef::Read | MemRef::Write);
386  break;
387  }
388 }
389 
390 void Lint::visitCallInst(CallInst &I) {
391  return visitCallSite(&I);
392 }
393 
394 void Lint::visitInvokeInst(InvokeInst &I) {
395  return visitCallSite(&I);
396 }
397 
398 void Lint::visitReturnInst(ReturnInst &I) {
399  Function *F = I.getParent()->getParent();
400  Assert(!F->doesNotReturn(),
401  "Unusual: Return statement in function with noreturn attribute", &I);
402 
403  if (Value *V = I.getReturnValue()) {
404  Value *Obj = findValue(V, /*OffsetOk=*/true);
405  Assert(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I);
406  }
407 }
408 
409 // TODO: Check that the reference is in bounds.
410 // TODO: Check readnone/readonly function attributes.
411 void Lint::visitMemoryReference(Instruction &I,
412  Value *Ptr, uint64_t Size, unsigned Align,
413  Type *Ty, unsigned Flags) {
414  // If no memory is being referenced, it doesn't matter if the pointer
415  // is valid.
416  if (Size == 0)
417  return;
418 
419  Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true);
420  Assert(!isa<ConstantPointerNull>(UnderlyingObject),
421  "Undefined behavior: Null pointer dereference", &I);
422  Assert(!isa<UndefValue>(UnderlyingObject),
423  "Undefined behavior: Undef pointer dereference", &I);
424  Assert(!isa<ConstantInt>(UnderlyingObject) ||
425  !cast<ConstantInt>(UnderlyingObject)->isMinusOne(),
426  "Unusual: All-ones pointer dereference", &I);
427  Assert(!isa<ConstantInt>(UnderlyingObject) ||
428  !cast<ConstantInt>(UnderlyingObject)->isOne(),
429  "Unusual: Address one pointer dereference", &I);
430 
431  if (Flags & MemRef::Write) {
432  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject))
433  Assert(!GV->isConstant(), "Undefined behavior: Write to read-only memory",
434  &I);
435  Assert(!isa<Function>(UnderlyingObject) &&
436  !isa<BlockAddress>(UnderlyingObject),
437  "Undefined behavior: Write to text section", &I);
438  }
439  if (Flags & MemRef::Read) {
440  Assert(!isa<Function>(UnderlyingObject), "Unusual: Load from function body",
441  &I);
442  Assert(!isa<BlockAddress>(UnderlyingObject),
443  "Undefined behavior: Load from block address", &I);
444  }
445  if (Flags & MemRef::Callee) {
446  Assert(!isa<BlockAddress>(UnderlyingObject),
447  "Undefined behavior: Call to block address", &I);
448  }
449  if (Flags & MemRef::Branchee) {
450  Assert(!isa<Constant>(UnderlyingObject) ||
451  isa<BlockAddress>(UnderlyingObject),
452  "Undefined behavior: Branch to non-blockaddress", &I);
453  }
454 
455  // Check for buffer overflows and misalignment.
456  // Only handles memory references that read/write something simple like an
457  // alloca instruction or a global variable.
458  int64_t Offset = 0;
459  if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, *DL)) {
460  // OK, so the access is to a constant offset from Ptr. Check that Ptr is
461  // something we can handle and if so extract the size of this base object
462  // along with its alignment.
463  uint64_t BaseSize = MemoryLocation::UnknownSize;
464  unsigned BaseAlign = 0;
465 
466  if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
467  Type *ATy = AI->getAllocatedType();
468  if (!AI->isArrayAllocation() && ATy->isSized())
469  BaseSize = DL->getTypeAllocSize(ATy);
470  BaseAlign = AI->getAlignment();
471  if (BaseAlign == 0 && ATy->isSized())
472  BaseAlign = DL->getABITypeAlignment(ATy);
473  } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
474  // If the global may be defined differently in another compilation unit
475  // then don't warn about funky memory accesses.
476  if (GV->hasDefinitiveInitializer()) {
477  Type *GTy = GV->getValueType();
478  if (GTy->isSized())
479  BaseSize = DL->getTypeAllocSize(GTy);
480  BaseAlign = GV->getAlignment();
481  if (BaseAlign == 0 && GTy->isSized())
482  BaseAlign = DL->getABITypeAlignment(GTy);
483  }
484  }
485 
486  // Accesses from before the start or after the end of the object are not
487  // defined.
489  BaseSize == MemoryLocation::UnknownSize ||
490  (Offset >= 0 && Offset + Size <= BaseSize),
491  "Undefined behavior: Buffer overflow", &I);
492 
493  // Accesses that say that the memory is more aligned than it is are not
494  // defined.
495  if (Align == 0 && Ty && Ty->isSized())
496  Align = DL->getABITypeAlignment(Ty);
497  Assert(!BaseAlign || Align <= MinAlign(BaseAlign, Offset),
498  "Undefined behavior: Memory reference address is misaligned", &I);
499  }
500 }
501 
502 void Lint::visitLoadInst(LoadInst &I) {
503  visitMemoryReference(I, I.getPointerOperand(),
504  DL->getTypeStoreSize(I.getType()), I.getAlignment(),
505  I.getType(), MemRef::Read);
506 }
507 
508 void Lint::visitStoreInst(StoreInst &I) {
509  visitMemoryReference(I, I.getPointerOperand(),
510  DL->getTypeStoreSize(I.getOperand(0)->getType()),
511  I.getAlignment(),
512  I.getOperand(0)->getType(), MemRef::Write);
513 }
514 
515 void Lint::visitXor(BinaryOperator &I) {
516  Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)),
517  "Undefined result: xor(undef, undef)", &I);
518 }
519 
520 void Lint::visitSub(BinaryOperator &I) {
521  Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)),
522  "Undefined result: sub(undef, undef)", &I);
523 }
524 
525 void Lint::visitLShr(BinaryOperator &I) {
526  if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(1),
527  /*OffsetOk=*/false)))
528  Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
529  "Undefined result: Shift count out of range", &I);
530 }
531 
532 void Lint::visitAShr(BinaryOperator &I) {
533  if (ConstantInt *CI =
534  dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
535  Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
536  "Undefined result: Shift count out of range", &I);
537 }
538 
539 void Lint::visitShl(BinaryOperator &I) {
540  if (ConstantInt *CI =
541  dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
542  Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
543  "Undefined result: Shift count out of range", &I);
544 }
545 
546 static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT,
547  AssumptionCache *AC) {
548  // Assume undef could be zero.
549  if (isa<UndefValue>(V))
550  return true;
551 
552  VectorType *VecTy = dyn_cast<VectorType>(V->getType());
553  if (!VecTy) {
554  KnownBits Known = computeKnownBits(V, DL, 0, AC, dyn_cast<Instruction>(V), DT);
555  return Known.isZero();
556  }
557 
558  // Per-component check doesn't work with zeroinitializer
559  Constant *C = dyn_cast<Constant>(V);
560  if (!C)
561  return false;
562 
563  if (C->isZeroValue())
564  return true;
565 
566  // For a vector, KnownZero will only be true if all values are zero, so check
567  // this per component
568  for (unsigned I = 0, N = VecTy->getNumElements(); I != N; ++I) {
569  Constant *Elem = C->getAggregateElement(I);
570  if (isa<UndefValue>(Elem))
571  return true;
572 
573  KnownBits Known = computeKnownBits(Elem, DL);
574  if (Known.isZero())
575  return true;
576  }
577 
578  return false;
579 }
580 
581 void Lint::visitSDiv(BinaryOperator &I) {
582  Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
583  "Undefined behavior: Division by zero", &I);
584 }
585 
586 void Lint::visitUDiv(BinaryOperator &I) {
587  Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
588  "Undefined behavior: Division by zero", &I);
589 }
590 
591 void Lint::visitSRem(BinaryOperator &I) {
592  Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
593  "Undefined behavior: Division by zero", &I);
594 }
595 
596 void Lint::visitURem(BinaryOperator &I) {
597  Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
598  "Undefined behavior: Division by zero", &I);
599 }
600 
601 void Lint::visitAllocaInst(AllocaInst &I) {
602  if (isa<ConstantInt>(I.getArraySize()))
603  // This isn't undefined behavior, it's just an obvious pessimization.
605  "Pessimization: Static alloca outside of entry block", &I);
606 
607  // TODO: Check for an unusual size (MSB set?)
608 }
609 
610 void Lint::visitVAArgInst(VAArgInst &I) {
611  visitMemoryReference(I, I.getOperand(0), MemoryLocation::UnknownSize, 0,
612  nullptr, MemRef::Read | MemRef::Write);
613 }
614 
615 void Lint::visitIndirectBrInst(IndirectBrInst &I) {
616  visitMemoryReference(I, I.getAddress(), MemoryLocation::UnknownSize, 0,
617  nullptr, MemRef::Branchee);
618 
619  Assert(I.getNumDestinations() != 0,
620  "Undefined behavior: indirectbr with no destinations", &I);
621 }
622 
623 void Lint::visitExtractElementInst(ExtractElementInst &I) {
624  if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getIndexOperand(),
625  /*OffsetOk=*/false)))
626  Assert(CI->getValue().ult(I.getVectorOperandType()->getNumElements()),
627  "Undefined result: extractelement index out of range", &I);
628 }
629 
630 void Lint::visitInsertElementInst(InsertElementInst &I) {
631  if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(2),
632  /*OffsetOk=*/false)))
633  Assert(CI->getValue().ult(I.getType()->getNumElements()),
634  "Undefined result: insertelement index out of range", &I);
635 }
636 
637 void Lint::visitUnreachableInst(UnreachableInst &I) {
638  // This isn't undefined behavior, it's merely suspicious.
639  Assert(&I == &I.getParent()->front() ||
640  std::prev(I.getIterator())->mayHaveSideEffects(),
641  "Unusual: unreachable immediately preceded by instruction without "
642  "side effects",
643  &I);
644 }
645 
646 /// findValue - Look through bitcasts and simple memory reference patterns
647 /// to identify an equivalent, but more informative, value. If OffsetOk
648 /// is true, look through getelementptrs with non-zero offsets too.
649 ///
650 /// Most analysis passes don't require this logic, because instcombine
651 /// will simplify most of these kinds of things away. But it's a goal of
652 /// this Lint pass to be useful even on non-optimized IR.
653 Value *Lint::findValue(Value *V, bool OffsetOk) const {
654  SmallPtrSet<Value *, 4> Visited;
655  return findValueImpl(V, OffsetOk, Visited);
656 }
657 
658 /// findValueImpl - Implementation helper for findValue.
659 Value *Lint::findValueImpl(Value *V, bool OffsetOk,
660  SmallPtrSetImpl<Value *> &Visited) const {
661  // Detect self-referential values.
662  if (!Visited.insert(V).second)
663  return UndefValue::get(V->getType());
664 
665  // TODO: Look through sext or zext cast, when the result is known to
666  // be interpreted as signed or unsigned, respectively.
667  // TODO: Look through eliminable cast pairs.
668  // TODO: Look through calls with unique return values.
669  // TODO: Look through vector insert/extract/shuffle.
670  V = OffsetOk ? GetUnderlyingObject(V, *DL) : V->stripPointerCasts();
671  if (LoadInst *L = dyn_cast<LoadInst>(V)) {
672  BasicBlock::iterator BBI = L->getIterator();
673  BasicBlock *BB = L->getParent();
674  SmallPtrSet<BasicBlock *, 4> VisitedBlocks;
675  for (;;) {
676  if (!VisitedBlocks.insert(BB).second)
677  break;
678  if (Value *U =
680  return findValueImpl(U, OffsetOk, Visited);
681  if (BBI != BB->begin()) break;
682  BB = BB->getUniquePredecessor();
683  if (!BB) break;
684  BBI = BB->end();
685  }
686  } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
687  if (Value *W = PN->hasConstantValue())
688  if (W != V)
689  return findValueImpl(W, OffsetOk, Visited);
690  } else if (CastInst *CI = dyn_cast<CastInst>(V)) {
691  if (CI->isNoopCast(*DL))
692  return findValueImpl(CI->getOperand(0), OffsetOk, Visited);
693  } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) {
694  if (Value *W = FindInsertedValue(Ex->getAggregateOperand(),
695  Ex->getIndices()))
696  if (W != V)
697  return findValueImpl(W, OffsetOk, Visited);
698  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
699  // Same as above, but for ConstantExpr instead of Instruction.
700  if (Instruction::isCast(CE->getOpcode())) {
701  if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()),
702  CE->getOperand(0)->getType(), CE->getType(),
703  *DL))
704  return findValueImpl(CE->getOperand(0), OffsetOk, Visited);
705  } else if (CE->getOpcode() == Instruction::ExtractValue) {
706  ArrayRef<unsigned> Indices = CE->getIndices();
707  if (Value *W = FindInsertedValue(CE->getOperand(0), Indices))
708  if (W != V)
709  return findValueImpl(W, OffsetOk, Visited);
710  }
711  }
712 
713  // As a last resort, try SimplifyInstruction or constant folding.
714  if (Instruction *Inst = dyn_cast<Instruction>(V)) {
715  if (Value *W = SimplifyInstruction(Inst, {*DL, TLI, DT, AC}))
716  return findValueImpl(W, OffsetOk, Visited);
717  } else if (auto *C = dyn_cast<Constant>(V)) {
718  if (Value *W = ConstantFoldConstant(C, *DL, TLI))
719  if (W && W != V)
720  return findValueImpl(W, OffsetOk, Visited);
721  }
722 
723  return V;
724 }
725 
726 //===----------------------------------------------------------------------===//
727 // Implement the public interfaces to this file...
728 //===----------------------------------------------------------------------===//
729 
731  return new Lint();
732 }
733 
734 /// lintFunction - Check a function for errors, printing messages on stderr.
735 ///
736 void llvm::lintFunction(const Function &f) {
737  Function &F = const_cast<Function&>(f);
738  assert(!F.isDeclaration() && "Cannot lint external functions");
739 
741  Lint *V = new Lint();
742  FPM.add(V);
743  FPM.run(F);
744 }
745 
746 /// lintModule - Check a module for errors, printing messages on stderr.
747 ///
748 void llvm::lintModule(const Module &M) {
750  Lint *V = new Lint();
751  PM.add(V);
752  PM.run(const_cast<Module&>(M));
753 }
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:177
uint64_t CallInst * C
Return a value (possibly void), from a function.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
User::op_iterator arg_iterator
The type of iterator to use when looping over actual arguments at this call site. ...
Definition: CallSite.h:213
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
void lintModule(const Module &M)
Check a module.
Definition: Lint.cpp:748
bool hasLocalLinkage() const
Definition: GlobalValue.h:436
This instruction extracts a struct member or array element value from an aggregate value...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
Base class for instruction visitors.
Definition: InstVisitor.h:81
unsigned arg_size() const
Definition: CallSite.h:219
CallingConv::ID getCallingConv() const
Get the calling convention of the call.
Definition: CallSite.h:312
This class represents lattice values for constants.
Definition: AllocatorList.h:24
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:265
static constexpr LocationSize unknown()
This class represents a function call, abstracting a target machine&#39;s calling convention.
cl::opt< unsigned > DefMaxInstsToScan
The default number of maximum instructions to scan in the block, used by FindAvailableLoadedValue().
void lintFunction(const Function &F)
lintFunction - Check a function for errors, printing messages on stderr.
Definition: Lint.cpp:736
An immutable pass that tracks lazily created AssumptionCache objects.
unsigned getSourceAlignment() const
iterator_range< IterTy > args() const
Definition: CallSite.h:215
A cache of @llvm.assume calls within a function.
static LocationSize precise(uint64_t Value)
This class wraps the llvm.memset intrinsic.
arg_iterator arg_end()
Definition: Function.h:680
F(f)
block Block Frequency true
An instruction for reading from memory.
Definition: Instructions.h:168
Value * getLength() const
#define Assert(C,...)
Definition: Lint.cpp:197
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:269
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
void add(Pass *P) override
Add a pass to the queue of passes to run.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:371
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
Implicit null checks
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:353
This class wraps the llvm.memmove intrinsic.
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, AliasAnalysis *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:321
IterTy arg_end() const
Definition: CallSite.h:575
InstrTy * getInstruction() const
Definition: CallSite.h:92
unsigned getDestAlignment() const
uint64_t getNumElements() const
Definition: DerivedTypes.h:359
This file implements a class to represent arbitrary precision integral constant values and operations...
ValTy * getCalledValue() const
Return the pointer to function that is being called.
Definition: CallSite.h:100
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Attempt to fold the constant using the specified DataLayout.
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:889
Class to represent function types.
Definition: DerivedTypes.h:103
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
Definition: Lint.cpp:84
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:247
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset...
lint
Definition: Lint.cpp:193
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
An instruction for storing to memory.
Definition: Instructions.h:321
VectorType * getType() const
Overload to return most specific vector type.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:145
amdgpu Simplify well known AMD library false Value * Callee
Value * getOperand(unsigned i) const
Definition: User.h:170
bool hasNoAliasAttr() const
Return true if this argument has the noalias attribute.
Definition: Function.cpp:134
bool isCall() const
Return true if a CallInst is enclosed.
Definition: CallSite.h:87
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:335
AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:78
bool isZeroValue() const
Return true if the value is negative zero or null value.
Definition: Constants.cpp:65
PassManager manages ModulePassManagers.
const BasicBlock & getEntryBlock() const
Definition: Function.h:640
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:610
static bool runOnFunction(Function &F, bool PostInlining)
This instruction inserts a single (scalar) element into a VectorType value.
bool hasName() const
Definition: Value.h:251
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
static bool isNoopCast(Instruction::CastOps Opcode, Type *SrcTy, Type *DstTy, const DataLayout &DL)
A no-op cast is one that can be effected without changing any bits.
This function has undefined behavior.
This is an important base class in LLVM.
Definition: Constant.h:42
bool hasStructRetAttr() const
Return true if this argument has the sret attribute.
Definition: Function.cpp:144
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
const Instruction & front() const
Definition: BasicBlock.h:281
Indirect Branch Instruction.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
bool doesNotReturn() const
Determine if the function cannot return.
Definition: Function.h:509
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
Value * getPointerOperand()
Definition: Instructions.h:285
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
arg_iterator arg_begin()
Definition: Function.h:671
self_iterator getIterator()
Definition: ilist_node.h:82
VectorType * getVectorOperandType() const
bool isZero() const
Returns true if value is all zero.
Definition: KnownBits.h:72
FunctionPassManager manages FunctionPasses and BasicBlockPassManagers.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1415
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:529
This class represents the va_arg llvm instruction, which returns an argument of the specified type gi...
bool isCast() const
Definition: Instruction.h:134
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:93
bool run(Module &M)
run - Execute all of the passes scheduled for execution.
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:4225
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR", false, true) INITIALIZE_PASS_END(Lint
Value * FindInsertedValue(Value *V, ArrayRef< unsigned > idx_range, Instruction *InsertBefore=nullptr)
Given an aggregrate and an sequence of indices, see if the scalar value indexed is already around as ...
FunctionPass * createLintPass()
Create a lint pass.
Definition: Lint.cpp:730
The two locations precisely alias each other.
Definition: AliasAnalysis.h:90
Iterator for intrusive lists based on ilist_node.
bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const
Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
ValTy * getArgument(unsigned ArgNo) const
Definition: CallSite.h:186
iterator end()
Definition: BasicBlock.h:271
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213
IterTy arg_begin() const
Definition: CallSite.h:571
Module.h This file contains the declarations for the Module class.
Provides information about what library functions are available for the current target.
void initializeLintPass(PassRegistry &)
This class wraps the llvm.memcpy intrinsic.
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1244
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
The access may modify the value stored in memory.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:164
Class to represent vector types.
Definition: DerivedTypes.h:393
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:56
void setPreservesAll()
Set by analyses that do not transform their input at all.
bool isTailCall() const
amdgpu Simplify well known AMD library false Value Value * Arg
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:546
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:241
This file provides utility analysis objects describing memory locations.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
This instruction extracts a single (scalar) element from a VectorType value.
uint32_t Size
Definition: Profile.cpp:47
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:366
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:206
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:483
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566
LLVM Value Representation.
Definition: Value.h:73
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
Invoke instruction.
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it...
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:260
AttributeList getAttributes() const
Get the parameter attributes of the call.
Definition: CallSite.h:329
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
The two locations alias, but only due to a partial overlap.
Definition: AliasAnalysis.h:88
Value * SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
Value * getPointerOperand()
Definition: Instructions.h:413
Statically lint checks LLVM IR
Definition: Lint.cpp:193
#define T1
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:67
an instruction to allocate memory on the stack
Definition: Instructions.h:60