LLVM  8.0.1
AtomicExpandPass.cpp
Go to the documentation of this file.
1 //===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains a pass (at IR level) to replace atomic instructions with
11 // __atomic_* library calls, or target specific instruction which implement the
12 // same semantics in a way which better fits the target backend. This can
13 // include the use of (intrinsic-based) load-linked/store-conditional loops,
14 // AtomicCmpXchg, or type coercions.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/IR/Attributes.h"
28 #include "llvm/IR/BasicBlock.h"
29 #include "llvm/IR/Constant.h"
30 #include "llvm/IR/Constants.h"
31 #include "llvm/IR/DataLayout.h"
32 #include "llvm/IR/DerivedTypes.h"
33 #include "llvm/IR/Function.h"
34 #include "llvm/IR/IRBuilder.h"
35 #include "llvm/IR/InstIterator.h"
36 #include "llvm/IR/Instruction.h"
37 #include "llvm/IR/Instructions.h"
38 #include "llvm/IR/Module.h"
39 #include "llvm/IR/Type.h"
40 #include "llvm/IR/User.h"
41 #include "llvm/IR/Value.h"
42 #include "llvm/Pass.h"
44 #include "llvm/Support/Casting.h"
45 #include "llvm/Support/Debug.h"
49 #include <cassert>
50 #include <cstdint>
51 #include <iterator>
52 
53 using namespace llvm;
54 
55 #define DEBUG_TYPE "atomic-expand"
56 
57 namespace {
58 
59  class AtomicExpand: public FunctionPass {
60  const TargetLowering *TLI = nullptr;
61 
62  public:
63  static char ID; // Pass identification, replacement for typeid
64 
65  AtomicExpand() : FunctionPass(ID) {
67  }
68 
69  bool runOnFunction(Function &F) override;
70 
71  private:
72  bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
73  IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
74  LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
75  bool tryExpandAtomicLoad(LoadInst *LI);
76  bool expandAtomicLoadToLL(LoadInst *LI);
77  bool expandAtomicLoadToCmpXchg(LoadInst *LI);
78  StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
79  bool expandAtomicStore(StoreInst *SI);
80  bool tryExpandAtomicRMW(AtomicRMWInst *AI);
81  Value *
82  insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
83  AtomicOrdering MemOpOrder,
84  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
85  void expandAtomicOpToLLSC(
86  Instruction *I, Type *ResultTy, Value *Addr, AtomicOrdering MemOpOrder,
87  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
88  void expandPartwordAtomicRMW(
89  AtomicRMWInst *I,
91  AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
92  void expandPartwordCmpXchg(AtomicCmpXchgInst *I);
93  void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
94  void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
95 
96  AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
97  static Value *insertRMWCmpXchgLoop(
98  IRBuilder<> &Builder, Type *ResultType, Value *Addr,
99  AtomicOrdering MemOpOrder,
100  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
101  CreateCmpXchgInstFun CreateCmpXchg);
102  bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
103 
104  bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
105  bool isIdempotentRMW(AtomicRMWInst *RMWI);
106  bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
107 
108  bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, unsigned Align,
109  Value *PointerOperand, Value *ValueOperand,
110  Value *CASExpected, AtomicOrdering Ordering,
111  AtomicOrdering Ordering2,
112  ArrayRef<RTLIB::Libcall> Libcalls);
113  void expandAtomicLoadToLibcall(LoadInst *LI);
114  void expandAtomicStoreToLibcall(StoreInst *LI);
115  void expandAtomicRMWToLibcall(AtomicRMWInst *I);
116  void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
117 
118  friend bool
120  CreateCmpXchgInstFun CreateCmpXchg);
121  };
122 
123 } // end anonymous namespace
124 
125 char AtomicExpand::ID = 0;
126 
128 
129 INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions",
130  false, false)
131 
132 FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); }
133 
134 // Helper functions to retrieve the size of atomic instructions.
135 static unsigned getAtomicOpSize(LoadInst *LI) {
136  const DataLayout &DL = LI->getModule()->getDataLayout();
137  return DL.getTypeStoreSize(LI->getType());
138 }
139 
140 static unsigned getAtomicOpSize(StoreInst *SI) {
141  const DataLayout &DL = SI->getModule()->getDataLayout();
142  return DL.getTypeStoreSize(SI->getValueOperand()->getType());
143 }
144 
145 static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
146  const DataLayout &DL = RMWI->getModule()->getDataLayout();
147  return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
148 }
149 
150 static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
151  const DataLayout &DL = CASI->getModule()->getDataLayout();
152  return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
153 }
154 
155 // Helper functions to retrieve the alignment of atomic instructions.
156 static unsigned getAtomicOpAlign(LoadInst *LI) {
157  unsigned Align = LI->getAlignment();
158  // In the future, if this IR restriction is relaxed, we should
159  // return DataLayout::getABITypeAlignment when there's no align
160  // value.
161  assert(Align != 0 && "An atomic LoadInst always has an explicit alignment");
162  return Align;
163 }
164 
165 static unsigned getAtomicOpAlign(StoreInst *SI) {
166  unsigned Align = SI->getAlignment();
167  // In the future, if this IR restriction is relaxed, we should
168  // return DataLayout::getABITypeAlignment when there's no align
169  // value.
170  assert(Align != 0 && "An atomic StoreInst always has an explicit alignment");
171  return Align;
172 }
173 
174 static unsigned getAtomicOpAlign(AtomicRMWInst *RMWI) {
175  // TODO(PR27168): This instruction has no alignment attribute, but unlike the
176  // default alignment for load/store, the default here is to assume
177  // it has NATURAL alignment, not DataLayout-specified alignment.
178  const DataLayout &DL = RMWI->getModule()->getDataLayout();
179  return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
180 }
181 
182 static unsigned getAtomicOpAlign(AtomicCmpXchgInst *CASI) {
183  // TODO(PR27168): same comment as above.
184  const DataLayout &DL = CASI->getModule()->getDataLayout();
185  return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
186 }
187 
188 // Determine if a particular atomic operation has a supported size,
189 // and is of appropriate alignment, to be passed through for target
190 // lowering. (Versus turning into a __atomic libcall)
191 template <typename Inst>
192 static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
193  unsigned Size = getAtomicOpSize(I);
194  unsigned Align = getAtomicOpAlign(I);
195  return Align >= Size && Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
196 }
197 
199  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
200  if (!TPC)
201  return false;
202 
203  auto &TM = TPC->getTM<TargetMachine>();
204  if (!TM.getSubtargetImpl(F)->enableAtomicExpand())
205  return false;
206  TLI = TM.getSubtargetImpl(F)->getTargetLowering();
207 
208  SmallVector<Instruction *, 1> AtomicInsts;
209 
210  // Changing control-flow while iterating through it is a bad idea, so gather a
211  // list of all atomic instructions before we start.
212  for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
213  Instruction *I = &*II;
214  if (I->isAtomic() && !isa<FenceInst>(I))
215  AtomicInsts.push_back(I);
216  }
217 
218  bool MadeChange = false;
219  for (auto I : AtomicInsts) {
220  auto LI = dyn_cast<LoadInst>(I);
221  auto SI = dyn_cast<StoreInst>(I);
222  auto RMWI = dyn_cast<AtomicRMWInst>(I);
223  auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
224  assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction");
225 
226  // If the Size/Alignment is not supported, replace with a libcall.
227  if (LI) {
228  if (!atomicSizeSupported(TLI, LI)) {
229  expandAtomicLoadToLibcall(LI);
230  MadeChange = true;
231  continue;
232  }
233  } else if (SI) {
234  if (!atomicSizeSupported(TLI, SI)) {
235  expandAtomicStoreToLibcall(SI);
236  MadeChange = true;
237  continue;
238  }
239  } else if (RMWI) {
240  if (!atomicSizeSupported(TLI, RMWI)) {
241  expandAtomicRMWToLibcall(RMWI);
242  MadeChange = true;
243  continue;
244  }
245  } else if (CASI) {
246  if (!atomicSizeSupported(TLI, CASI)) {
247  expandAtomicCASToLibcall(CASI);
248  MadeChange = true;
249  continue;
250  }
251  }
252 
253  if (TLI->shouldInsertFencesForAtomic(I)) {
254  auto FenceOrdering = AtomicOrdering::Monotonic;
255  if (LI && isAcquireOrStronger(LI->getOrdering())) {
256  FenceOrdering = LI->getOrdering();
257  LI->setOrdering(AtomicOrdering::Monotonic);
258  } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
259  FenceOrdering = SI->getOrdering();
260  SI->setOrdering(AtomicOrdering::Monotonic);
261  } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
262  isAcquireOrStronger(RMWI->getOrdering()))) {
263  FenceOrdering = RMWI->getOrdering();
264  RMWI->setOrdering(AtomicOrdering::Monotonic);
265  } else if (CASI &&
266  TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
268  (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
269  isAcquireOrStronger(CASI->getSuccessOrdering()))) {
270  // If a compare and swap is lowered to LL/SC, we can do smarter fence
271  // insertion, with a stronger one on the success path than on the
272  // failure path. As a result, fence insertion is directly done by
273  // expandAtomicCmpXchg in that case.
274  FenceOrdering = CASI->getSuccessOrdering();
275  CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
276  CASI->setFailureOrdering(AtomicOrdering::Monotonic);
277  }
278 
279  if (FenceOrdering != AtomicOrdering::Monotonic) {
280  MadeChange |= bracketInstWithFences(I, FenceOrdering);
281  }
282  }
283 
284  if (LI) {
285  if (LI->getType()->isFloatingPointTy()) {
286  // TODO: add a TLI hook to control this so that each target can
287  // convert to lowering the original type one at a time.
288  LI = convertAtomicLoadToIntegerType(LI);
289  assert(LI->getType()->isIntegerTy() && "invariant broken");
290  MadeChange = true;
291  }
292 
293  MadeChange |= tryExpandAtomicLoad(LI);
294  } else if (SI) {
295  if (SI->getValueOperand()->getType()->isFloatingPointTy()) {
296  // TODO: add a TLI hook to control this so that each target can
297  // convert to lowering the original type one at a time.
298  SI = convertAtomicStoreToIntegerType(SI);
299  assert(SI->getValueOperand()->getType()->isIntegerTy() &&
300  "invariant broken");
301  MadeChange = true;
302  }
303 
304  if (TLI->shouldExpandAtomicStoreInIR(SI))
305  MadeChange |= expandAtomicStore(SI);
306  } else if (RMWI) {
307  // There are two different ways of expanding RMW instructions:
308  // - into a load if it is idempotent
309  // - into a Cmpxchg/LL-SC loop otherwise
310  // we try them in that order.
311 
312  if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
313  MadeChange = true;
314  } else {
315  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
316  unsigned ValueSize = getAtomicOpSize(RMWI);
317  AtomicRMWInst::BinOp Op = RMWI->getOperation();
318  if (ValueSize < MinCASSize &&
319  (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
320  Op == AtomicRMWInst::And)) {
321  RMWI = widenPartwordAtomicRMW(RMWI);
322  MadeChange = true;
323  }
324 
325  MadeChange |= tryExpandAtomicRMW(RMWI);
326  }
327  } else if (CASI) {
328  // TODO: when we're ready to make the change at the IR level, we can
329  // extend convertCmpXchgToInteger for floating point too.
330  assert(!CASI->getCompareOperand()->getType()->isFloatingPointTy() &&
331  "unimplemented - floating point not legal at IR level");
332  if (CASI->getCompareOperand()->getType()->isPointerTy() ) {
333  // TODO: add a TLI hook to control this so that each target can
334  // convert to lowering the original type one at a time.
335  CASI = convertCmpXchgToIntegerType(CASI);
336  assert(CASI->getCompareOperand()->getType()->isIntegerTy() &&
337  "invariant broken");
338  MadeChange = true;
339  }
340 
341  MadeChange |= tryExpandAtomicCmpXchg(CASI);
342  }
343  }
344  return MadeChange;
345 }
346 
347 bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) {
348  IRBuilder<> Builder(I);
349 
350  auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
351 
352  auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
353  // We have a guard here because not every atomic operation generates a
354  // trailing fence.
355  if (TrailingFence)
356  TrailingFence->moveAfter(I);
357 
358  return (LeadingFence || TrailingFence);
359 }
360 
361 /// Get the iX type with the same bitwidth as T.
362 IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
363  const DataLayout &DL) {
364  EVT VT = TLI->getValueType(DL, T);
365  unsigned BitWidth = VT.getStoreSizeInBits();
366  assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
367  return IntegerType::get(T->getContext(), BitWidth);
368 }
369 
370 /// Convert an atomic load of a non-integral type to an integer load of the
371 /// equivalent bitwidth. See the function comment on
372 /// convertAtomicStoreToIntegerType for background.
373 LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
374  auto *M = LI->getModule();
375  Type *NewTy = getCorrespondingIntegerType(LI->getType(),
376  M->getDataLayout());
377 
378  IRBuilder<> Builder(LI);
379 
380  Value *Addr = LI->getPointerOperand();
381  Type *PT = PointerType::get(NewTy,
382  Addr->getType()->getPointerAddressSpace());
383  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
384 
385  auto *NewLI = Builder.CreateLoad(NewAddr);
386  NewLI->setAlignment(LI->getAlignment());
387  NewLI->setVolatile(LI->isVolatile());
388  NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
389  LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
390 
391  Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
392  LI->replaceAllUsesWith(NewVal);
393  LI->eraseFromParent();
394  return NewLI;
395 }
396 
397 bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
398  switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
400  return false;
402  expandAtomicOpToLLSC(
403  LI, LI->getType(), LI->getPointerOperand(), LI->getOrdering(),
404  [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; });
405  return true;
407  return expandAtomicLoadToLL(LI);
409  return expandAtomicLoadToCmpXchg(LI);
410  default:
411  llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
412  }
413 }
414 
415 bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
416  IRBuilder<> Builder(LI);
417 
418  // On some architectures, load-linked instructions are atomic for larger
419  // sizes than normal loads. For example, the only 64-bit load guaranteed
420  // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
421  Value *Val =
422  TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering());
423  TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
424 
425  LI->replaceAllUsesWith(Val);
426  LI->eraseFromParent();
427 
428  return true;
429 }
430 
431 bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
432  IRBuilder<> Builder(LI);
433  AtomicOrdering Order = LI->getOrdering();
434  Value *Addr = LI->getPointerOperand();
435  Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
436  Constant *DummyVal = Constant::getNullValue(Ty);
437 
438  Value *Pair = Builder.CreateAtomicCmpXchg(
439  Addr, DummyVal, DummyVal, Order,
441  Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
442 
443  LI->replaceAllUsesWith(Loaded);
444  LI->eraseFromParent();
445 
446  return true;
447 }
448 
449 /// Convert an atomic store of a non-integral type to an integer store of the
450 /// equivalent bitwidth. We used to not support floating point or vector
451 /// atomics in the IR at all. The backends learned to deal with the bitcast
452 /// idiom because that was the only way of expressing the notion of a atomic
453 /// float or vector store. The long term plan is to teach each backend to
454 /// instruction select from the original atomic store, but as a migration
455 /// mechanism, we convert back to the old format which the backends understand.
456 /// Each backend will need individual work to recognize the new format.
457 StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
458  IRBuilder<> Builder(SI);
459  auto *M = SI->getModule();
460  Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
461  M->getDataLayout());
462  Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
463 
464  Value *Addr = SI->getPointerOperand();
465  Type *PT = PointerType::get(NewTy,
466  Addr->getType()->getPointerAddressSpace());
467  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
468 
469  StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
470  NewSI->setAlignment(SI->getAlignment());
471  NewSI->setVolatile(SI->isVolatile());
472  NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
473  LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
474  SI->eraseFromParent();
475  return NewSI;
476 }
477 
478 bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
479  // This function is only called on atomic stores that are too large to be
480  // atomic if implemented as a native store. So we replace them by an
481  // atomic swap, that can be implemented for example as a ldrex/strex on ARM
482  // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
483  // It is the responsibility of the target to only signal expansion via
484  // shouldExpandAtomicRMW in cases where this is required and possible.
485  IRBuilder<> Builder(SI);
486  AtomicRMWInst *AI =
488  SI->getValueOperand(), SI->getOrdering());
489  SI->eraseFromParent();
490 
491  // Now we have an appropriate swap instruction, lower it as usual.
492  return tryExpandAtomicRMW(AI);
493 }
494 
495 static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
496  Value *Loaded, Value *NewVal,
497  AtomicOrdering MemOpOrder,
498  Value *&Success, Value *&NewLoaded) {
499  Value* Pair = Builder.CreateAtomicCmpXchg(
500  Addr, Loaded, NewVal, MemOpOrder,
502  Success = Builder.CreateExtractValue(Pair, 1, "success");
503  NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
504 }
505 
506 /// Emit IR to implement the given atomicrmw operation on values in registers,
507 /// returning the new value.
509  Value *Loaded, Value *Inc) {
510  Value *NewVal;
511  switch (Op) {
512  case AtomicRMWInst::Xchg:
513  return Inc;
514  case AtomicRMWInst::Add:
515  return Builder.CreateAdd(Loaded, Inc, "new");
516  case AtomicRMWInst::Sub:
517  return Builder.CreateSub(Loaded, Inc, "new");
518  case AtomicRMWInst::And:
519  return Builder.CreateAnd(Loaded, Inc, "new");
520  case AtomicRMWInst::Nand:
521  return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
522  case AtomicRMWInst::Or:
523  return Builder.CreateOr(Loaded, Inc, "new");
524  case AtomicRMWInst::Xor:
525  return Builder.CreateXor(Loaded, Inc, "new");
526  case AtomicRMWInst::Max:
527  NewVal = Builder.CreateICmpSGT(Loaded, Inc);
528  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
529  case AtomicRMWInst::Min:
530  NewVal = Builder.CreateICmpSLE(Loaded, Inc);
531  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
532  case AtomicRMWInst::UMax:
533  NewVal = Builder.CreateICmpUGT(Loaded, Inc);
534  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
535  case AtomicRMWInst::UMin:
536  NewVal = Builder.CreateICmpULE(Loaded, Inc);
537  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
538  default:
539  llvm_unreachable("Unknown atomic op");
540  }
541 }
542 
543 bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
544  switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
546  return false;
548  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
549  unsigned ValueSize = getAtomicOpSize(AI);
550  if (ValueSize < MinCASSize) {
552  "MinCmpXchgSizeInBits not yet supported for LL/SC architectures.");
553  } else {
554  auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) {
555  return performAtomicOp(AI->getOperation(), Builder, Loaded,
556  AI->getValOperand());
557  };
558  expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
559  AI->getOrdering(), PerformOp);
560  }
561  return true;
562  }
564  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
565  unsigned ValueSize = getAtomicOpSize(AI);
566  if (ValueSize < MinCASSize) {
567  expandPartwordAtomicRMW(AI,
569  } else {
571  }
572  return true;
573  }
575  expandAtomicRMWToMaskedIntrinsic(AI);
576  return true;
577  }
578  default:
579  llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
580  }
581 }
582 
583 namespace {
584 
585 /// Result values from createMaskInstrs helper.
586 struct PartwordMaskValues {
587  Type *WordType;
588  Type *ValueType;
589  Value *AlignedAddr;
590  Value *ShiftAmt;
591  Value *Mask;
592  Value *Inv_Mask;
593 };
594 
595 } // end anonymous namespace
596 
597 /// This is a helper function which builds instructions to provide
598 /// values necessary for partword atomic operations. It takes an
599 /// incoming address, Addr, and ValueType, and constructs the address,
600 /// shift-amounts and masks needed to work with a larger value of size
601 /// WordSize.
602 ///
603 /// AlignedAddr: Addr rounded down to a multiple of WordSize
604 ///
605 /// ShiftAmt: Number of bits to right-shift a WordSize value loaded
606 /// from AlignAddr for it to have the same value as if
607 /// ValueType was loaded from Addr.
608 ///
609 /// Mask: Value to mask with the value loaded from AlignAddr to
610 /// include only the part that would've been loaded from Addr.
611 ///
612 /// Inv_Mask: The inverse of Mask.
613 static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
614  Type *ValueType, Value *Addr,
615  unsigned WordSize) {
616  PartwordMaskValues Ret;
617 
618  BasicBlock *BB = I->getParent();
619  Function *F = BB->getParent();
620  Module *M = I->getModule();
621 
622  LLVMContext &Ctx = F->getContext();
623  const DataLayout &DL = M->getDataLayout();
624 
625  unsigned ValueSize = DL.getTypeStoreSize(ValueType);
626 
627  assert(ValueSize < WordSize);
628 
629  Ret.ValueType = ValueType;
630  Ret.WordType = Type::getIntNTy(Ctx, WordSize * 8);
631 
632  Type *WordPtrType =
633  Ret.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace());
634 
635  Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx));
636  Ret.AlignedAddr = Builder.CreateIntToPtr(
637  Builder.CreateAnd(AddrInt, ~(uint64_t)(WordSize - 1)), WordPtrType,
638  "AlignedAddr");
639 
640  Value *PtrLSB = Builder.CreateAnd(AddrInt, WordSize - 1, "PtrLSB");
641  if (DL.isLittleEndian()) {
642  // turn bytes into bits
643  Ret.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
644  } else {
645  // turn bytes into bits, and count from the other side.
646  Ret.ShiftAmt =
647  Builder.CreateShl(Builder.CreateXor(PtrLSB, WordSize - ValueSize), 3);
648  }
649 
650  Ret.ShiftAmt = Builder.CreateTrunc(Ret.ShiftAmt, Ret.WordType, "ShiftAmt");
651  Ret.Mask = Builder.CreateShl(
652  ConstantInt::get(Ret.WordType, (1 << ValueSize * 8) - 1), Ret.ShiftAmt,
653  "Mask");
654  Ret.Inv_Mask = Builder.CreateNot(Ret.Mask, "Inv_Mask");
655 
656  return Ret;
657 }
658 
659 /// Emit IR to implement a masked version of a given atomicrmw
660 /// operation. (That is, only the bits under the Mask should be
661 /// affected by the operation)
663  IRBuilder<> &Builder, Value *Loaded,
664  Value *Shifted_Inc, Value *Inc,
665  const PartwordMaskValues &PMV) {
666  // TODO: update to use
667  // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
668  // to merge bits from two values without requiring PMV.Inv_Mask.
669  switch (Op) {
670  case AtomicRMWInst::Xchg: {
671  Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
672  Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
673  return FinalVal;
674  }
675  case AtomicRMWInst::Or:
676  case AtomicRMWInst::Xor:
677  case AtomicRMWInst::And:
678  llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
679  case AtomicRMWInst::Add:
680  case AtomicRMWInst::Sub:
681  case AtomicRMWInst::Nand: {
682  // The other arithmetic ops need to be masked into place.
683  Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
684  Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
685  Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
686  Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
687  return FinalVal;
688  }
689  case AtomicRMWInst::Max:
690  case AtomicRMWInst::Min:
691  case AtomicRMWInst::UMax:
692  case AtomicRMWInst::UMin: {
693  // Finally, comparison ops will operate on the full value, so
694  // truncate down to the original size, and expand out again after
695  // doing the operation.
696  Value *Loaded_Shiftdown = Builder.CreateTrunc(
697  Builder.CreateLShr(Loaded, PMV.ShiftAmt), PMV.ValueType);
698  Value *NewVal = performAtomicOp(Op, Builder, Loaded_Shiftdown, Inc);
699  Value *NewVal_Shiftup = Builder.CreateShl(
700  Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
701  Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
702  Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shiftup);
703  return FinalVal;
704  }
705  default:
706  llvm_unreachable("Unknown atomic op");
707  }
708 }
709 
710 /// Expand a sub-word atomicrmw operation into an appropriate
711 /// word-sized operation.
712 ///
713 /// It will create an LL/SC or cmpxchg loop, as appropriate, the same
714 /// way as a typical atomicrmw expansion. The only difference here is
715 /// that the operation inside of the loop must operate only upon a
716 /// part of the value.
717 void AtomicExpand::expandPartwordAtomicRMW(
720 
721  AtomicOrdering MemOpOrder = AI->getOrdering();
722 
723  IRBuilder<> Builder(AI);
724 
725  PartwordMaskValues PMV =
726  createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
727  TLI->getMinCmpXchgSizeInBits() / 8);
728 
729  Value *ValOperand_Shifted =
730  Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
731  PMV.ShiftAmt, "ValOperand_Shifted");
732 
733  auto PerformPartwordOp = [&](IRBuilder<> &Builder, Value *Loaded) {
734  return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded,
735  ValOperand_Shifted, AI->getValOperand(), PMV);
736  };
737 
738  // TODO: When we're ready to support LLSC conversions too, use
739  // insertRMWLLSCLoop here for ExpansionKind==LLSC.
740  Value *OldResult =
741  insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder,
742  PerformPartwordOp, createCmpXchgInstFun);
743  Value *FinalOldResult = Builder.CreateTrunc(
744  Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
745  AI->replaceAllUsesWith(FinalOldResult);
746  AI->eraseFromParent();
747 }
748 
749 // Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
750 AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
751  IRBuilder<> Builder(AI);
753 
754  assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
755  Op == AtomicRMWInst::And) &&
756  "Unable to widen operation");
757 
758  PartwordMaskValues PMV =
759  createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
760  TLI->getMinCmpXchgSizeInBits() / 8);
761 
762  Value *ValOperand_Shifted =
763  Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
764  PMV.ShiftAmt, "ValOperand_Shifted");
765 
766  Value *NewOperand;
767 
768  if (Op == AtomicRMWInst::And)
769  NewOperand =
770  Builder.CreateOr(PMV.Inv_Mask, ValOperand_Shifted, "AndOperand");
771  else
772  NewOperand = ValOperand_Shifted;
773 
774  AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(Op, PMV.AlignedAddr,
775  NewOperand, AI->getOrdering());
776 
777  Value *FinalOldResult = Builder.CreateTrunc(
778  Builder.CreateLShr(NewAI, PMV.ShiftAmt), PMV.ValueType);
779  AI->replaceAllUsesWith(FinalOldResult);
780  AI->eraseFromParent();
781  return NewAI;
782 }
783 
784 void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
785  // The basic idea here is that we're expanding a cmpxchg of a
786  // smaller memory size up to a word-sized cmpxchg. To do this, we
787  // need to add a retry-loop for strong cmpxchg, so that
788  // modifications to other parts of the word don't cause a spurious
789  // failure.
790 
791  // This generates code like the following:
792  // [[Setup mask values PMV.*]]
793  // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
794  // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
795  // %InitLoaded = load i32* %addr
796  // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
797  // br partword.cmpxchg.loop
798  // partword.cmpxchg.loop:
799  // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
800  // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
801  // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
802  // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
803  // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
804  // i32 %FullWord_NewVal success_ordering failure_ordering
805  // %OldVal = extractvalue { i32, i1 } %NewCI, 0
806  // %Success = extractvalue { i32, i1 } %NewCI, 1
807  // br i1 %Success, label %partword.cmpxchg.end,
808  // label %partword.cmpxchg.failure
809  // partword.cmpxchg.failure:
810  // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
811  // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
812  // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
813  // label %partword.cmpxchg.end
814  // partword.cmpxchg.end:
815  // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
816  // %FinalOldVal = trunc i32 %tmp1 to i8
817  // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
818  // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
819 
820  Value *Addr = CI->getPointerOperand();
821  Value *Cmp = CI->getCompareOperand();
822  Value *NewVal = CI->getNewValOperand();
823 
824  BasicBlock *BB = CI->getParent();
825  Function *F = BB->getParent();
826  IRBuilder<> Builder(CI);
827  LLVMContext &Ctx = Builder.getContext();
828 
829  const int WordSize = TLI->getMinCmpXchgSizeInBits() / 8;
830 
831  BasicBlock *EndBB =
832  BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
833  auto FailureBB =
834  BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
835  auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
836 
837  // The split call above "helpfully" added a branch at the end of BB
838  // (to the wrong place).
839  std::prev(BB->end())->eraseFromParent();
840  Builder.SetInsertPoint(BB);
841 
842  PartwordMaskValues PMV = createMaskInstrs(
843  Builder, CI, CI->getCompareOperand()->getType(), Addr, WordSize);
844 
845  // Shift the incoming values over, into the right location in the word.
846  Value *NewVal_Shifted =
847  Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
848  Value *Cmp_Shifted =
849  Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
850 
851  // Load the entire current word, and mask into place the expected and new
852  // values
853  LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
854  InitLoaded->setVolatile(CI->isVolatile());
855  Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
856  Builder.CreateBr(LoopBB);
857 
858  // partword.cmpxchg.loop:
859  Builder.SetInsertPoint(LoopBB);
860  PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
861  Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
862 
863  // Mask/Or the expected and new values into place in the loaded word.
864  Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
865  Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
866  AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
867  PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, CI->getSuccessOrdering(),
868  CI->getFailureOrdering(), CI->getSyncScopeID());
869  NewCI->setVolatile(CI->isVolatile());
870  // When we're building a strong cmpxchg, we need a loop, so you
871  // might think we could use a weak cmpxchg inside. But, using strong
872  // allows the below comparison for ShouldContinue, and we're
873  // expecting the underlying cmpxchg to be a machine instruction,
874  // which is strong anyways.
875  NewCI->setWeak(CI->isWeak());
876 
877  Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
878  Value *Success = Builder.CreateExtractValue(NewCI, 1);
879 
880  if (CI->isWeak())
881  Builder.CreateBr(EndBB);
882  else
883  Builder.CreateCondBr(Success, EndBB, FailureBB);
884 
885  // partword.cmpxchg.failure:
886  Builder.SetInsertPoint(FailureBB);
887  // Upon failure, verify that the masked-out part of the loaded value
888  // has been modified. If it didn't, abort the cmpxchg, since the
889  // masked-in part must've.
890  Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
891  Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
892  Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
893 
894  // Add the second value to the phi from above
895  Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
896 
897  // partword.cmpxchg.end:
898  Builder.SetInsertPoint(CI);
899 
900  Value *FinalOldVal = Builder.CreateTrunc(
901  Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType);
902  Value *Res = UndefValue::get(CI->getType());
903  Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
904  Res = Builder.CreateInsertValue(Res, Success, 1);
905 
906  CI->replaceAllUsesWith(Res);
907  CI->eraseFromParent();
908 }
909 
910 void AtomicExpand::expandAtomicOpToLLSC(
911  Instruction *I, Type *ResultType, Value *Addr, AtomicOrdering MemOpOrder,
912  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
913  IRBuilder<> Builder(I);
914  Value *Loaded =
915  insertRMWLLSCLoop(Builder, ResultType, Addr, MemOpOrder, PerformOp);
916 
917  I->replaceAllUsesWith(Loaded);
918  I->eraseFromParent();
919 }
920 
921 void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
922  IRBuilder<> Builder(AI);
923 
924  PartwordMaskValues PMV =
925  createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
926  TLI->getMinCmpXchgSizeInBits() / 8);
927 
928  // The value operand must be sign-extended for signed min/max so that the
929  // target's signed comparison instructions can be used. Otherwise, just
930  // zero-ext.
931  Instruction::CastOps CastOp = Instruction::ZExt;
932  AtomicRMWInst::BinOp RMWOp = AI->getOperation();
933  if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
934  CastOp = Instruction::SExt;
935 
936  Value *ValOperand_Shifted = Builder.CreateShl(
937  Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
938  PMV.ShiftAmt, "ValOperand_Shifted");
939  Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
940  Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
941  AI->getOrdering());
942  Value *FinalOldResult = Builder.CreateTrunc(
943  Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
944  AI->replaceAllUsesWith(FinalOldResult);
945  AI->eraseFromParent();
946 }
947 
948 void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) {
949  IRBuilder<> Builder(CI);
950 
951  PartwordMaskValues PMV = createMaskInstrs(
952  Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
953  TLI->getMinCmpXchgSizeInBits() / 8);
954 
955  Value *CmpVal_Shifted = Builder.CreateShl(
956  Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
957  "CmpVal_Shifted");
958  Value *NewVal_Shifted = Builder.CreateShl(
959  Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
960  "NewVal_Shifted");
961  Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
962  Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
963  CI->getSuccessOrdering());
964  Value *FinalOldVal = Builder.CreateTrunc(
965  Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType);
966 
967  Value *Res = UndefValue::get(CI->getType());
968  Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
969  Value *Success = Builder.CreateICmpEQ(
970  CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
971  Res = Builder.CreateInsertValue(Res, Success, 1);
972 
973  CI->replaceAllUsesWith(Res);
974  CI->eraseFromParent();
975 }
976 
977 Value *AtomicExpand::insertRMWLLSCLoop(
978  IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
979  AtomicOrdering MemOpOrder,
980  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
981  LLVMContext &Ctx = Builder.getContext();
982  BasicBlock *BB = Builder.GetInsertBlock();
983  Function *F = BB->getParent();
984 
985  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
986  //
987  // The standard expansion we produce is:
988  // [...]
989  // atomicrmw.start:
990  // %loaded = @load.linked(%addr)
991  // %new = some_op iN %loaded, %incr
992  // %stored = @store_conditional(%new, %addr)
993  // %try_again = icmp i32 ne %stored, 0
994  // br i1 %try_again, label %loop, label %atomicrmw.end
995  // atomicrmw.end:
996  // [...]
997  BasicBlock *ExitBB =
998  BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
999  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1000 
1001  // The split call above "helpfully" added a branch at the end of BB (to the
1002  // wrong place).
1003  std::prev(BB->end())->eraseFromParent();
1004  Builder.SetInsertPoint(BB);
1005  Builder.CreateBr(LoopBB);
1006 
1007  // Start the main loop block now that we've taken care of the preliminaries.
1008  Builder.SetInsertPoint(LoopBB);
1009  Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
1010 
1011  Value *NewVal = PerformOp(Builder, Loaded);
1012 
1013  Value *StoreSuccess =
1014  TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1015  Value *TryAgain = Builder.CreateICmpNE(
1016  StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1017  Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1018 
1019  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1020  return Loaded;
1021 }
1022 
1023 /// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1024 /// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1025 /// IR. As a migration step, we convert back to what use to be the standard
1026 /// way to represent a pointer cmpxchg so that we can update backends one by
1027 /// one.
1028 AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1029  auto *M = CI->getModule();
1030  Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1031  M->getDataLayout());
1032 
1033  IRBuilder<> Builder(CI);
1034 
1035  Value *Addr = CI->getPointerOperand();
1036  Type *PT = PointerType::get(NewTy,
1037  Addr->getType()->getPointerAddressSpace());
1038  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
1039 
1040  Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1041  Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1042 
1043 
1044  auto *NewCI = Builder.CreateAtomicCmpXchg(NewAddr, NewCmp, NewNewVal,
1045  CI->getSuccessOrdering(),
1046  CI->getFailureOrdering(),
1047  CI->getSyncScopeID());
1048  NewCI->setVolatile(CI->isVolatile());
1049  NewCI->setWeak(CI->isWeak());
1050  LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1051 
1052  Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1053  Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1054 
1055  OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1056 
1057  Value *Res = UndefValue::get(CI->getType());
1058  Res = Builder.CreateInsertValue(Res, OldVal, 0);
1059  Res = Builder.CreateInsertValue(Res, Succ, 1);
1060 
1061  CI->replaceAllUsesWith(Res);
1062  CI->eraseFromParent();
1063  return NewCI;
1064 }
1065 
1066 bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1067  AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1068  AtomicOrdering FailureOrder = CI->getFailureOrdering();
1069  Value *Addr = CI->getPointerOperand();
1070  BasicBlock *BB = CI->getParent();
1071  Function *F = BB->getParent();
1072  LLVMContext &Ctx = F->getContext();
1073  // If shouldInsertFencesForAtomic() returns true, then the target does not
1074  // want to deal with memory orders, and emitLeading/TrailingFence should take
1075  // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1076  // should preserve the ordering.
1077  bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1078  AtomicOrdering MemOpOrder =
1079  ShouldInsertFencesForAtomic ? AtomicOrdering::Monotonic : SuccessOrder;
1080 
1081  // In implementations which use a barrier to achieve release semantics, we can
1082  // delay emitting this barrier until we know a store is actually going to be
1083  // attempted. The cost of this delay is that we need 2 copies of the block
1084  // emitting the load-linked, affecting code size.
1085  //
1086  // Ideally, this logic would be unconditional except for the minsize check
1087  // since in other cases the extra blocks naturally collapse down to the
1088  // minimal loop. Unfortunately, this puts too much stress on later
1089  // optimisations so we avoid emitting the extra logic in those cases too.
1090  bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1091  SuccessOrder != AtomicOrdering::Monotonic &&
1092  SuccessOrder != AtomicOrdering::Acquire &&
1093  !F->optForMinSize();
1094 
1095  // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1096  // do it even on minsize.
1097  bool UseUnconditionalReleaseBarrier = F->optForMinSize() && !CI->isWeak();
1098 
1099  // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1100  //
1101  // The full expansion we produce is:
1102  // [...]
1103  // cmpxchg.start:
1104  // %unreleasedload = @load.linked(%addr)
1105  // %should_store = icmp eq %unreleasedload, %desired
1106  // br i1 %should_store, label %cmpxchg.fencedstore,
1107  // label %cmpxchg.nostore
1108  // cmpxchg.releasingstore:
1109  // fence?
1110  // br label cmpxchg.trystore
1111  // cmpxchg.trystore:
1112  // %loaded.trystore = phi [%unreleasedload, %releasingstore],
1113  // [%releasedload, %cmpxchg.releasedload]
1114  // %stored = @store_conditional(%new, %addr)
1115  // %success = icmp eq i32 %stored, 0
1116  // br i1 %success, label %cmpxchg.success,
1117  // label %cmpxchg.releasedload/%cmpxchg.failure
1118  // cmpxchg.releasedload:
1119  // %releasedload = @load.linked(%addr)
1120  // %should_store = icmp eq %releasedload, %desired
1121  // br i1 %should_store, label %cmpxchg.trystore,
1122  // label %cmpxchg.failure
1123  // cmpxchg.success:
1124  // fence?
1125  // br label %cmpxchg.end
1126  // cmpxchg.nostore:
1127  // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1128  // [%releasedload,
1129  // %cmpxchg.releasedload/%cmpxchg.trystore]
1130  // @load_linked_fail_balance()?
1131  // br label %cmpxchg.failure
1132  // cmpxchg.failure:
1133  // fence?
1134  // br label %cmpxchg.end
1135  // cmpxchg.end:
1136  // %loaded = phi [%loaded.nostore, %cmpxchg.failure],
1137  // [%loaded.trystore, %cmpxchg.trystore]
1138  // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1139  // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1140  // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1141  // [...]
1142  BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1143  auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1144  auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1145  auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1146  auto ReleasedLoadBB =
1147  BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1148  auto TryStoreBB =
1149  BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1150  auto ReleasingStoreBB =
1151  BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1152  auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1153 
1154  // This grabs the DebugLoc from CI
1155  IRBuilder<> Builder(CI);
1156 
1157  // The split call above "helpfully" added a branch at the end of BB (to the
1158  // wrong place), but we might want a fence too. It's easiest to just remove
1159  // the branch entirely.
1160  std::prev(BB->end())->eraseFromParent();
1161  Builder.SetInsertPoint(BB);
1162  if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1163  TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1164  Builder.CreateBr(StartBB);
1165 
1166  // Start the main loop block now that we've taken care of the preliminaries.
1167  Builder.SetInsertPoint(StartBB);
1168  Value *UnreleasedLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
1169  Value *ShouldStore = Builder.CreateICmpEQ(
1170  UnreleasedLoad, CI->getCompareOperand(), "should_store");
1171 
1172  // If the cmpxchg doesn't actually need any ordering when it fails, we can
1173  // jump straight past that fence instruction (if it exists).
1174  Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
1175 
1176  Builder.SetInsertPoint(ReleasingStoreBB);
1177  if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1178  TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1179  Builder.CreateBr(TryStoreBB);
1180 
1181  Builder.SetInsertPoint(TryStoreBB);
1182  Value *StoreSuccess = TLI->emitStoreConditional(
1183  Builder, CI->getNewValOperand(), Addr, MemOpOrder);
1184  StoreSuccess = Builder.CreateICmpEQ(
1185  StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1186  BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1187  Builder.CreateCondBr(StoreSuccess, SuccessBB,
1188  CI->isWeak() ? FailureBB : RetryBB);
1189 
1190  Builder.SetInsertPoint(ReleasedLoadBB);
1191  Value *SecondLoad;
1192  if (HasReleasedLoadBB) {
1193  SecondLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
1194  ShouldStore = Builder.CreateICmpEQ(SecondLoad, CI->getCompareOperand(),
1195  "should_store");
1196 
1197  // If the cmpxchg doesn't actually need any ordering when it fails, we can
1198  // jump straight past that fence instruction (if it exists).
1199  Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
1200  } else
1201  Builder.CreateUnreachable();
1202 
1203  // Make sure later instructions don't get reordered with a fence if
1204  // necessary.
1205  Builder.SetInsertPoint(SuccessBB);
1206  if (ShouldInsertFencesForAtomic)
1207  TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1208  Builder.CreateBr(ExitBB);
1209 
1210  Builder.SetInsertPoint(NoStoreBB);
1211  // In the failing case, where we don't execute the store-conditional, the
1212  // target might want to balance out the load-linked with a dedicated
1213  // instruction (e.g., on ARM, clearing the exclusive monitor).
1214  TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1215  Builder.CreateBr(FailureBB);
1216 
1217  Builder.SetInsertPoint(FailureBB);
1218  if (ShouldInsertFencesForAtomic)
1219  TLI->emitTrailingFence(Builder, CI, FailureOrder);
1220  Builder.CreateBr(ExitBB);
1221 
1222  // Finally, we have control-flow based knowledge of whether the cmpxchg
1223  // succeeded or not. We expose this to later passes by converting any
1224  // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1225  // PHI.
1226  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1227  PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
1228  Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1229  Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1230 
1231  // Setup the builder so we can create any PHIs we need.
1232  Value *Loaded;
1233  if (!HasReleasedLoadBB)
1234  Loaded = UnreleasedLoad;
1235  else {
1236  Builder.SetInsertPoint(TryStoreBB, TryStoreBB->begin());
1237  PHINode *TryStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
1238  TryStoreLoaded->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1239  TryStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
1240 
1241  Builder.SetInsertPoint(NoStoreBB, NoStoreBB->begin());
1242  PHINode *NoStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
1243  NoStoreLoaded->addIncoming(UnreleasedLoad, StartBB);
1244  NoStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
1245 
1246  Builder.SetInsertPoint(ExitBB, ++ExitBB->begin());
1247  PHINode *ExitLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
1248  ExitLoaded->addIncoming(TryStoreLoaded, SuccessBB);
1249  ExitLoaded->addIncoming(NoStoreLoaded, FailureBB);
1250 
1251  Loaded = ExitLoaded;
1252  }
1253 
1254  // Look for any users of the cmpxchg that are just comparing the loaded value
1255  // against the desired one, and replace them with the CFG-derived version.
1257  for (auto User : CI->users()) {
1259  if (!EV)
1260  continue;
1261 
1262  assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1263  "weird extraction from { iN, i1 }");
1264 
1265  if (EV->getIndices()[0] == 0)
1266  EV->replaceAllUsesWith(Loaded);
1267  else
1268  EV->replaceAllUsesWith(Success);
1269 
1270  PrunedInsts.push_back(EV);
1271  }
1272 
1273  // We can remove the instructions now we're no longer iterating through them.
1274  for (auto EV : PrunedInsts)
1275  EV->eraseFromParent();
1276 
1277  if (!CI->use_empty()) {
1278  // Some use of the full struct return that we don't understand has happened,
1279  // so we've got to reconstruct it properly.
1280  Value *Res;
1281  Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
1282  Res = Builder.CreateInsertValue(Res, Success, 1);
1283 
1284  CI->replaceAllUsesWith(Res);
1285  }
1286 
1287  CI->eraseFromParent();
1288  return true;
1289 }
1290 
1291 bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) {
1292  auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1293  if(!C)
1294  return false;
1295 
1297  switch(Op) {
1298  case AtomicRMWInst::Add:
1299  case AtomicRMWInst::Sub:
1300  case AtomicRMWInst::Or:
1301  case AtomicRMWInst::Xor:
1302  return C->isZero();
1303  case AtomicRMWInst::And:
1304  return C->isMinusOne();
1305  // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
1306  default:
1307  return false;
1308  }
1309 }
1310 
1311 bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
1312  if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1313  tryExpandAtomicLoad(ResultingLoad);
1314  return true;
1315  }
1316  return false;
1317 }
1318 
1319 Value *AtomicExpand::insertRMWCmpXchgLoop(
1320  IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
1321  AtomicOrdering MemOpOrder,
1322  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
1323  CreateCmpXchgInstFun CreateCmpXchg) {
1324  LLVMContext &Ctx = Builder.getContext();
1325  BasicBlock *BB = Builder.GetInsertBlock();
1326  Function *F = BB->getParent();
1327 
1328  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1329  //
1330  // The standard expansion we produce is:
1331  // [...]
1332  // %init_loaded = load atomic iN* %addr
1333  // br label %loop
1334  // loop:
1335  // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1336  // %new = some_op iN %loaded, %incr
1337  // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1338  // %new_loaded = extractvalue { iN, i1 } %pair, 0
1339  // %success = extractvalue { iN, i1 } %pair, 1
1340  // br i1 %success, label %atomicrmw.end, label %loop
1341  // atomicrmw.end:
1342  // [...]
1343  BasicBlock *ExitBB =
1344  BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1345  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1346 
1347  // The split call above "helpfully" added a branch at the end of BB (to the
1348  // wrong place), but we want a load. It's easiest to just remove
1349  // the branch entirely.
1350  std::prev(BB->end())->eraseFromParent();
1351  Builder.SetInsertPoint(BB);
1352  LoadInst *InitLoaded = Builder.CreateLoad(ResultTy, Addr);
1353  // Atomics require at least natural alignment.
1354  InitLoaded->setAlignment(ResultTy->getPrimitiveSizeInBits() / 8);
1355  Builder.CreateBr(LoopBB);
1356 
1357  // Start the main loop block now that we've taken care of the preliminaries.
1358  Builder.SetInsertPoint(LoopBB);
1359  PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1360  Loaded->addIncoming(InitLoaded, BB);
1361 
1362  Value *NewVal = PerformOp(Builder, Loaded);
1363 
1364  Value *NewLoaded = nullptr;
1365  Value *Success = nullptr;
1366 
1367  CreateCmpXchg(Builder, Addr, Loaded, NewVal,
1368  MemOpOrder == AtomicOrdering::Unordered
1370  : MemOpOrder,
1371  Success, NewLoaded);
1372  assert(Success && NewLoaded);
1373 
1374  Loaded->addIncoming(NewLoaded, LoopBB);
1375 
1376  Builder.CreateCondBr(Success, ExitBB, LoopBB);
1377 
1378  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1379  return NewLoaded;
1380 }
1381 
1382 bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1383  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1384  unsigned ValueSize = getAtomicOpSize(CI);
1385 
1386  switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1387  default:
1388  llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1390  if (ValueSize < MinCASSize)
1391  expandPartwordCmpXchg(CI);
1392  return false;
1394  assert(ValueSize >= MinCASSize &&
1395  "MinCmpXchgSizeInBits not yet supported for LL/SC expansions.");
1396  return expandAtomicCmpXchg(CI);
1397  }
1399  expandAtomicCmpXchgToMaskedIntrinsic(CI);
1400  return true;
1401  }
1402 }
1403 
1404 // Note: This function is exposed externally by AtomicExpandUtils.h
1406  CreateCmpXchgInstFun CreateCmpXchg) {
1407  IRBuilder<> Builder(AI);
1408  Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
1409  Builder, AI->getType(), AI->getPointerOperand(), AI->getOrdering(),
1410  [&](IRBuilder<> &Builder, Value *Loaded) {
1411  return performAtomicOp(AI->getOperation(), Builder, Loaded,
1412  AI->getValOperand());
1413  },
1414  CreateCmpXchg);
1415 
1416  AI->replaceAllUsesWith(Loaded);
1417  AI->eraseFromParent();
1418  return true;
1419 }
1420 
1421 // In order to use one of the sized library calls such as
1422 // __atomic_fetch_add_4, the alignment must be sufficient, the size
1423 // must be one of the potentially-specialized sizes, and the value
1424 // type must actually exist in C on the target (otherwise, the
1425 // function wouldn't actually be defined.)
1426 static bool canUseSizedAtomicCall(unsigned Size, unsigned Align,
1427  const DataLayout &DL) {
1428  // TODO: "LargestSize" is an approximation for "largest type that
1429  // you can express in C". It seems to be the case that int128 is
1430  // supported on all 64-bit platforms, otherwise only up to 64-bit
1431  // integers are supported. If we get this wrong, then we'll try to
1432  // call a sized libcall that doesn't actually exist. There should
1433  // really be some more reliable way in LLVM of determining integer
1434  // sizes which are valid in the target's C ABI...
1435  unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1436  return Align >= Size &&
1437  (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1438  Size <= LargestSize;
1439 }
1440 
1441 void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) {
1442  static const RTLIB::Libcall Libcalls[6] = {
1443  RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1444  RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1445  unsigned Size = getAtomicOpSize(I);
1446  unsigned Align = getAtomicOpAlign(I);
1447 
1448  bool expanded = expandAtomicOpToLibcall(
1449  I, Size, Align, I->getPointerOperand(), nullptr, nullptr,
1450  I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1451  (void)expanded;
1452  assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Load");
1453 }
1454 
1455 void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) {
1456  static const RTLIB::Libcall Libcalls[6] = {
1457  RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1458  RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1459  unsigned Size = getAtomicOpSize(I);
1460  unsigned Align = getAtomicOpAlign(I);
1461 
1462  bool expanded = expandAtomicOpToLibcall(
1463  I, Size, Align, I->getPointerOperand(), I->getValueOperand(), nullptr,
1464  I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1465  (void)expanded;
1466  assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Store");
1467 }
1468 
1469 void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1470  static const RTLIB::Libcall Libcalls[6] = {
1471  RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1472  RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1473  RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1474  unsigned Size = getAtomicOpSize(I);
1475  unsigned Align = getAtomicOpAlign(I);
1476 
1477  bool expanded = expandAtomicOpToLibcall(
1478  I, Size, Align, I->getPointerOperand(), I->getNewValOperand(),
1480  Libcalls);
1481  (void)expanded;
1482  assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor CAS");
1483 }
1484 
1486  static const RTLIB::Libcall LibcallsXchg[6] = {
1487  RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1488  RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1489  RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1490  static const RTLIB::Libcall LibcallsAdd[6] = {
1491  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1492  RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1493  RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1494  static const RTLIB::Libcall LibcallsSub[6] = {
1495  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1496  RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1497  RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1498  static const RTLIB::Libcall LibcallsAnd[6] = {
1499  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1500  RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1501  RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1502  static const RTLIB::Libcall LibcallsOr[6] = {
1503  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1504  RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1505  RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1506  static const RTLIB::Libcall LibcallsXor[6] = {
1507  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1508  RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1509  RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1510  static const RTLIB::Libcall LibcallsNand[6] = {
1511  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1512  RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1513  RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1514 
1515  switch (Op) {
1517  llvm_unreachable("Should not have BAD_BINOP.");
1518  case AtomicRMWInst::Xchg:
1519  return makeArrayRef(LibcallsXchg);
1520  case AtomicRMWInst::Add:
1521  return makeArrayRef(LibcallsAdd);
1522  case AtomicRMWInst::Sub:
1523  return makeArrayRef(LibcallsSub);
1524  case AtomicRMWInst::And:
1525  return makeArrayRef(LibcallsAnd);
1526  case AtomicRMWInst::Or:
1527  return makeArrayRef(LibcallsOr);
1528  case AtomicRMWInst::Xor:
1529  return makeArrayRef(LibcallsXor);
1530  case AtomicRMWInst::Nand:
1531  return makeArrayRef(LibcallsNand);
1532  case AtomicRMWInst::Max:
1533  case AtomicRMWInst::Min:
1534  case AtomicRMWInst::UMax:
1535  case AtomicRMWInst::UMin:
1536  // No atomic libcalls are available for max/min/umax/umin.
1537  return {};
1538  }
1539  llvm_unreachable("Unexpected AtomicRMW operation.");
1540 }
1541 
1542 void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1544 
1545  unsigned Size = getAtomicOpSize(I);
1546  unsigned Align = getAtomicOpAlign(I);
1547 
1548  bool Success = false;
1549  if (!Libcalls.empty())
1550  Success = expandAtomicOpToLibcall(
1551  I, Size, Align, I->getPointerOperand(), I->getValOperand(), nullptr,
1552  I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1553 
1554  // The expansion failed: either there were no libcalls at all for
1555  // the operation (min/max), or there were only size-specialized
1556  // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1557  // CAS libcall, via a CAS loop, instead.
1558  if (!Success) {
1559  expandAtomicRMWToCmpXchg(I, [this](IRBuilder<> &Builder, Value *Addr,
1560  Value *Loaded, Value *NewVal,
1561  AtomicOrdering MemOpOrder,
1562  Value *&Success, Value *&NewLoaded) {
1563  // Create the CAS instruction normally...
1564  AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1565  Addr, Loaded, NewVal, MemOpOrder,
1567  Success = Builder.CreateExtractValue(Pair, 1, "success");
1568  NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1569 
1570  // ...and then expand the CAS into a libcall.
1571  expandAtomicCASToLibcall(Pair);
1572  });
1573  }
1574 }
1575 
1576 // A helper routine for the above expandAtomic*ToLibcall functions.
1577 //
1578 // 'Libcalls' contains an array of enum values for the particular
1579 // ATOMIC libcalls to be emitted. All of the other arguments besides
1580 // 'I' are extracted from the Instruction subclass by the
1581 // caller. Depending on the particular call, some will be null.
1582 bool AtomicExpand::expandAtomicOpToLibcall(
1583  Instruction *I, unsigned Size, unsigned Align, Value *PointerOperand,
1584  Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1585  AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1586  assert(Libcalls.size() == 6);
1587 
1588  LLVMContext &Ctx = I->getContext();
1589  Module *M = I->getModule();
1590  const DataLayout &DL = M->getDataLayout();
1591  IRBuilder<> Builder(I);
1592  IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1593 
1594  bool UseSizedLibcall = canUseSizedAtomicCall(Size, Align, DL);
1595  Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
1596 
1597  unsigned AllocaAlignment = DL.getPrefTypeAlignment(SizedIntTy);
1598 
1599  // TODO: the "order" argument type is "int", not int32. So
1600  // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1601  ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size);
1602  assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1603  Constant *OrderingVal =
1604  ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
1605  Constant *Ordering2Val = nullptr;
1606  if (CASExpected) {
1607  assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1608  Ordering2Val =
1609  ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
1610  }
1611  bool HasResult = I->getType() != Type::getVoidTy(Ctx);
1612 
1613  RTLIB::Libcall RTLibType;
1614  if (UseSizedLibcall) {
1615  switch (Size) {
1616  case 1: RTLibType = Libcalls[1]; break;
1617  case 2: RTLibType = Libcalls[2]; break;
1618  case 4: RTLibType = Libcalls[3]; break;
1619  case 8: RTLibType = Libcalls[4]; break;
1620  case 16: RTLibType = Libcalls[5]; break;
1621  }
1622  } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1623  RTLibType = Libcalls[0];
1624  } else {
1625  // Can't use sized function, and there's no generic for this
1626  // operation, so give up.
1627  return false;
1628  }
1629 
1630  // Build up the function call. There's two kinds. First, the sized
1631  // variants. These calls are going to be one of the following (with
1632  // N=1,2,4,8,16):
1633  // iN __atomic_load_N(iN *ptr, int ordering)
1634  // void __atomic_store_N(iN *ptr, iN val, int ordering)
1635  // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
1636  // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
1637  // int success_order, int failure_order)
1638  //
1639  // Note that these functions can be used for non-integer atomic
1640  // operations, the values just need to be bitcast to integers on the
1641  // way in and out.
1642  //
1643  // And, then, the generic variants. They look like the following:
1644  // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1645  // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1646  // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
1647  // int ordering)
1648  // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
1649  // void *desired, int success_order,
1650  // int failure_order)
1651  //
1652  // The different signatures are built up depending on the
1653  // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
1654  // variables.
1655 
1656  AllocaInst *AllocaCASExpected = nullptr;
1657  Value *AllocaCASExpected_i8 = nullptr;
1658  AllocaInst *AllocaValue = nullptr;
1659  Value *AllocaValue_i8 = nullptr;
1660  AllocaInst *AllocaResult = nullptr;
1661  Value *AllocaResult_i8 = nullptr;
1662 
1663  Type *ResultTy;
1665  AttributeList Attr;
1666 
1667  // 'size' argument.
1668  if (!UseSizedLibcall) {
1669  // Note, getIntPtrType is assumed equivalent to size_t.
1670  Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
1671  }
1672 
1673  // 'ptr' argument.
1674  Value *PtrVal =
1675  Builder.CreateBitCast(PointerOperand, Type::getInt8PtrTy(Ctx));
1676  Args.push_back(PtrVal);
1677 
1678  // 'expected' argument, if present.
1679  if (CASExpected) {
1680  AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
1681  AllocaCASExpected->setAlignment(AllocaAlignment);
1682  AllocaCASExpected_i8 =
1683  Builder.CreateBitCast(AllocaCASExpected, Type::getInt8PtrTy(Ctx));
1684  Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64);
1685  Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
1686  Args.push_back(AllocaCASExpected_i8);
1687  }
1688 
1689  // 'val' argument ('desired' for cas), if present.
1690  if (ValueOperand) {
1691  if (UseSizedLibcall) {
1692  Value *IntValue =
1693  Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
1694  Args.push_back(IntValue);
1695  } else {
1696  AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
1697  AllocaValue->setAlignment(AllocaAlignment);
1698  AllocaValue_i8 =
1699  Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx));
1700  Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64);
1701  Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
1702  Args.push_back(AllocaValue_i8);
1703  }
1704  }
1705 
1706  // 'ret' argument.
1707  if (!CASExpected && HasResult && !UseSizedLibcall) {
1708  AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
1709  AllocaResult->setAlignment(AllocaAlignment);
1710  AllocaResult_i8 =
1711  Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx));
1712  Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64);
1713  Args.push_back(AllocaResult_i8);
1714  }
1715 
1716  // 'ordering' ('success_order' for cas) argument.
1717  Args.push_back(OrderingVal);
1718 
1719  // 'failure_order' argument, if present.
1720  if (Ordering2Val)
1721  Args.push_back(Ordering2Val);
1722 
1723  // Now, the return type.
1724  if (CASExpected) {
1725  ResultTy = Type::getInt1Ty(Ctx);
1727  } else if (HasResult && UseSizedLibcall)
1728  ResultTy = SizedIntTy;
1729  else
1730  ResultTy = Type::getVoidTy(Ctx);
1731 
1732  // Done with setting up arguments and return types, create the call:
1733  SmallVector<Type *, 6> ArgTys;
1734  for (Value *Arg : Args)
1735  ArgTys.push_back(Arg->getType());
1736  FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
1737  Constant *LibcallFn =
1738  M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
1739  CallInst *Call = Builder.CreateCall(LibcallFn, Args);
1740  Call->setAttributes(Attr);
1741  Value *Result = Call;
1742 
1743  // And then, extract the results...
1744  if (ValueOperand && !UseSizedLibcall)
1745  Builder.CreateLifetimeEnd(AllocaValue_i8, SizeVal64);
1746 
1747  if (CASExpected) {
1748  // The final result from the CAS is {load of 'expected' alloca, bool result
1749  // from call}
1750  Type *FinalResultTy = I->getType();
1751  Value *V = UndefValue::get(FinalResultTy);
1752  Value *ExpectedOut =
1753  Builder.CreateAlignedLoad(AllocaCASExpected, AllocaAlignment);
1754  Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64);
1755  V = Builder.CreateInsertValue(V, ExpectedOut, 0);
1756  V = Builder.CreateInsertValue(V, Result, 1);
1757  I->replaceAllUsesWith(V);
1758  } else if (HasResult) {
1759  Value *V;
1760  if (UseSizedLibcall)
1761  V = Builder.CreateBitOrPointerCast(Result, I->getType());
1762  else {
1763  V = Builder.CreateAlignedLoad(AllocaResult, AllocaAlignment);
1764  Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64);
1765  }
1766  I->replaceAllUsesWith(V);
1767  }
1768  I->eraseFromParent();
1769  return true;
1770 }
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:410
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:68
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:585
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional &#39;br Cond, TrueDest, FalseDest&#39; instruction.
Definition: IRBuilder.h:854
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:173
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
This instruction extracts a struct member or array element value from an aggregate value...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
*p = old <signed v ? old : v
Definition: Instructions.h:722
LLVMContext & getContext() const
Definition: IRBuilder.h:123
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1843
Atomic ordering constants.
This class represents lattice values for constants.
Definition: AllocatorList.h:24
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
Constant * getOrInsertFunction(StringRef Name, FunctionType *T, AttributeList AttributeList)
Look up the specified function in the module symbol table.
Definition: Module.cpp:144
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve &#39;CreateLoad(Ty, Ptr, "...")&#39; correctly, instead of converting the string to &#39;bool...
Definition: IRBuilder.h:1357
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1200
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align, const char *Name)
Provided to resolve &#39;CreateAlignedLoad(Ptr, Align, "...")&#39; correctly, instead of converting the strin...
Definition: IRBuilder.h:1393
void setAlignment(unsigned Align)
an instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
Definition: Instructions.h:529
This class represents a function call, abstracting a target machine&#39;s calling convention.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:630
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this store instruction.
Definition: Instructions.h:385
static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, Type *ValueType, Value *Addr, unsigned WordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
*p = old <unsigned v ? old : v
Definition: Instructions.h:726
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:117
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
Definition: Instructions.h:564
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:248
*p = old >unsigned v ? old : v
Definition: Instructions.h:724
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:705
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional l...
An instruction for reading from memory.
Definition: Instructions.h:168
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
an instruction that atomically reads a memory location, combines it with another value, and then stores the result back.
Definition: Instructions.h:692
Value * CreateICmpULE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1859
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:396
*p = old >signed v ? old : v
Definition: Instructions.h:720
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:130
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:265
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:596
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align, bool isVolatile=false)
Definition: IRBuilder.h:1430
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:269
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1334
ArrayRef< unsigned > getIndices() const
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:232
inst_iterator inst_begin(Function *F)
Definition: InstIterator.h:132
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:371
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
BinOp getOperation() const
Definition: Instructions.h:745
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
Definition: Instructions.h:570
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:743
This file contains the simple types necessary to represent the attributes associated with functions a...
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1014
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Definition: Instructions.h:558
Value * CreateICmpSGT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1863
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition: IRBuilder.h:1386
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1727
Class to represent function types.
Definition: DerivedTypes.h:103
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1732
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
static bool canUseSizedAtomicCall(unsigned Size, unsigned Align, const DataLayout &DL)
Value * CreateICmpUGT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1847
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:704
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:583
static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, Value *Loaded, Value *NewVal, AtomicOrdering MemOpOrder, Value *&Success, Value *&NewLoaded)
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:121
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:221
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1444
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1031
An instruction for storing to memory.
Definition: Instructions.h:321
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1659
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:127
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1182
const BasicBlock & getEntryBlock() const
Definition: Function.h:640
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:750
static bool runOnFunction(Function &F, bool PostInlining)
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:978
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.h:2021
const Instruction & front() const
Definition: BasicBlock.h:281
bool isAcquireOrStronger(AtomicOrdering ao)
unsigned getNumIndices() const
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:740
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:161
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:772
static unsigned getAtomicOpSize(LoadInst *LI)
void initializeAtomicExpandPass(PassRegistry &)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
static FunctionType * get(Type *Result, ArrayRef< Type *> Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:297
Value * getPointerOperand()
Definition: Instructions.h:285
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:100
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1839
self_iterator getIterator()
Definition: ilist_node.h:82
Class to represent integer types.
Definition: DerivedTypes.h:40
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:60
void setAlignment(unsigned Align)
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1452
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:193
INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", false, false) FunctionPass *llvm
Extended Value Type.
Definition: ValueTypes.h:34
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1415
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
Definition: Instructions.h:609
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2083
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:220
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:774
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1655
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
static Value * performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, Value *Loaded, Value *Inc)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success...
Definition: Instructions.h:641
bool isReleaseOrStronger(AtomicOrdering ao)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:1969
CallInst * CreateLifetimeEnd(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.end intrinsic.
Definition: IRBuilder.cpp:417
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:240
Value * getValOperand()
Definition: Instructions.h:800
unsigned getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:310
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
iterator end()
Definition: BasicBlock.h:271
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
Module.h This file contains the declarations for the Module class.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:180
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:622
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:578
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void setVolatile(bool V)
Specify whether this is a volatile store or not.
Definition: Instructions.h:357
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:56
void setWeak(bool IsWeak)
Definition: Instructions.h:574
iterator_range< user_iterator > users()
Definition: Value.h:400
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1103
amdgpu Simplify well known AMD library false Value Value * Arg
#define Success
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1810
LLVM_NODISCARD AttributeList addAttribute(LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const
Add an attribute to the attribute set at the given index.
bool isVolatile() const
Return true if this is a store to a volatile memory location.
Definition: Instructions.h:354
#define DEBUG_TYPE
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:241
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
AtomicOrdering getOrdering() const
Returns the ordering constraint of this store instruction.
Definition: Instructions.h:373
Value * getPointerOperand()
Definition: Instructions.h:796
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:260
#define I(x, y, z)
Definition: MD5.cpp:58
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:595
static unsigned getAtomicOpAlign(LoadInst *LI)
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
Definition: InstrTypes.h:1248
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
uint32_t Size
Definition: Profile.cpp:47
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1974
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:408
AtomicOrderingCABI toCABI(AtomicOrdering ao)
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:366
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1164
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1722
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1769
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
LLVM Value Representation.
Definition: Value.h:73
void setAlignment(unsigned Align)
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:419
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional &#39;br label X&#39; instruction.
Definition: IRBuilder.h:848
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:122
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1124
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
CallInst * CreateLifetimeStart(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.start intrinsic.
Definition: IRBuilder.cpp:401
inst_iterator inst_end(Function *F)
Definition: InstIterator.h:133
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:789
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2091
#define LLVM_DEBUG(X)
Definition: Debug.h:123
FunctionPass * createAtomicExpandPass()
void setVolatile(bool V)
Specify whether this is a volatile load or not.
Definition: Instructions.h:235
Value * CreateICmpSLE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1875
Value * getPointerOperand()
Definition: Instructions.h:413
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:785
bool use_empty() const
Definition: Value.h:323
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
Definition: Instruction.h:67
an instruction to allocate memory on the stack
Definition: Instructions.h:60
A discriminated union of two pointer types, with the discriminator in the low bit of the pointer...
Definition: PointerUnion.h:87