LLVM  8.0.1
WebAssemblyLateEHPrepare.cpp
Go to the documentation of this file.
1 //=== WebAssemblyLateEHPrepare.cpp - WebAssembly Exception Preparation -===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Does various transformations for exception handling.
12 ///
13 //===----------------------------------------------------------------------===//
14 
16 #include "WebAssembly.h"
17 #include "WebAssemblySubtarget.h"
18 #include "WebAssemblyUtilities.h"
21 #include "llvm/MC/MCAsmInfo.h"
22 using namespace llvm;
23 
24 #define DEBUG_TYPE "wasm-exception-prepare"
25 
26 namespace {
27 class WebAssemblyLateEHPrepare final : public MachineFunctionPass {
28  StringRef getPassName() const override {
29  return "WebAssembly Prepare Exception";
30  }
31 
32  bool runOnMachineFunction(MachineFunction &MF) override;
33 
34  bool removeUnnecessaryUnreachables(MachineFunction &MF);
35  bool replaceFuncletReturns(MachineFunction &MF);
36  bool hoistCatches(MachineFunction &MF);
37  bool addCatchAlls(MachineFunction &MF);
38  bool addRethrows(MachineFunction &MF);
39  bool ensureSingleBBTermPads(MachineFunction &MF);
40  bool mergeTerminatePads(MachineFunction &MF);
41  bool addCatchAllTerminatePads(MachineFunction &MF);
42 
43 public:
44  static char ID; // Pass identification, replacement for typeid
45  WebAssemblyLateEHPrepare() : MachineFunctionPass(ID) {}
46 };
47 } // end anonymous namespace
48 
50 INITIALIZE_PASS(WebAssemblyLateEHPrepare, DEBUG_TYPE,
51  "WebAssembly Late Exception Preparation", false, false)
52 
54  return new WebAssemblyLateEHPrepare();
55 }
56 
57 // Returns the nearest EH pad that dominates this instruction. This does not use
58 // dominator analysis; it just does BFS on its predecessors until arriving at an
59 // EH pad. This assumes valid EH scopes so the first EH pad it arrives in all
60 // possible search paths should be the same.
61 // Returns nullptr in case it does not find any EH pad in the search, or finds
62 // multiple different EH pads.
64  MachineFunction *MF = MI->getParent()->getParent();
67  WL.push_back(MI->getParent());
68  MachineBasicBlock *EHPad = nullptr;
69  while (!WL.empty()) {
70  MachineBasicBlock *MBB = WL.pop_back_val();
71  if (Visited.count(MBB))
72  continue;
73  Visited.insert(MBB);
74  if (MBB->isEHPad()) {
75  if (EHPad && EHPad != MBB)
76  return nullptr;
77  EHPad = MBB;
78  continue;
79  }
80  if (MBB == &MF->front())
81  return nullptr;
82  WL.append(MBB->pred_begin(), MBB->pred_end());
83  }
84  return EHPad;
85 }
86 
87 // Erase the specified BBs if the BB does not have any remaining predecessors,
88 // and also all its dead children.
89 template <typename Container>
90 static void eraseDeadBBsAndChildren(const Container &MBBs) {
91  SmallVector<MachineBasicBlock *, 8> WL(MBBs.begin(), MBBs.end());
92  while (!WL.empty()) {
93  MachineBasicBlock *MBB = WL.pop_back_val();
94  if (!MBB->pred_empty())
95  continue;
97  MBB->succ_end());
98  WL.append(MBB->succ_begin(), MBB->succ_end());
99  for (auto *Succ : Succs)
100  MBB->removeSuccessor(Succ);
101  MBB->eraseFromParent();
102  }
103 }
104 
105 bool WebAssemblyLateEHPrepare::runOnMachineFunction(MachineFunction &MF) {
106  LLVM_DEBUG(dbgs() << "********** Late EH Prepare **********\n"
107  "********** Function: "
108  << MF.getName() << '\n');
109 
112  return false;
113 
114  bool Changed = false;
115  Changed |= removeUnnecessaryUnreachables(MF);
116  Changed |= addRethrows(MF);
117  if (!MF.getFunction().hasPersonalityFn())
118  return Changed;
119  Changed |= replaceFuncletReturns(MF);
120  Changed |= hoistCatches(MF);
121  Changed |= addCatchAlls(MF);
122  Changed |= ensureSingleBBTermPads(MF);
123  Changed |= mergeTerminatePads(MF);
124  Changed |= addCatchAllTerminatePads(MF);
125  return Changed;
126 }
127 
128 bool WebAssemblyLateEHPrepare::removeUnnecessaryUnreachables(
129  MachineFunction &MF) {
130  bool Changed = false;
131  for (auto &MBB : MF) {
132  for (auto &MI : MBB) {
133  if (!WebAssembly::isThrow(MI))
134  continue;
135  Changed = true;
136 
137  // The instruction after the throw should be an unreachable or a branch to
138  // another BB that should eventually lead to an unreachable. Delete it
139  // because throw itself is a terminator, and also delete successors if
140  // any.
141  MBB.erase(std::next(MachineBasicBlock::iterator(MI)), MBB.end());
142  SmallVector<MachineBasicBlock *, 8> Succs(MBB.succ_begin(),
143  MBB.succ_end());
144  for (auto *Succ : Succs)
145  MBB.removeSuccessor(Succ);
147  }
148  }
149 
150  return Changed;
151 }
152 
153 bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) {
154  bool Changed = false;
155  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
156  auto *EHInfo = MF.getWasmEHFuncInfo();
157 
158  for (auto &MBB : MF) {
159  auto Pos = MBB.getFirstTerminator();
160  if (Pos == MBB.end())
161  continue;
162  MachineInstr *TI = &*Pos;
163 
164  switch (TI->getOpcode()) {
165  case WebAssembly::CATCHRET: {
166  // Replace a catchret with a branch
167  MachineBasicBlock *TBB = TI->getOperand(0).getMBB();
168  if (!MBB.isLayoutSuccessor(TBB))
169  BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::BR))
170  .addMBB(TBB);
171  TI->eraseFromParent();
172  Changed = true;
173  break;
174  }
176  // Replace a cleanupret with a rethrow
177  if (EHInfo->hasThrowUnwindDest(&MBB))
178  BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::RETHROW))
179  .addMBB(EHInfo->getThrowUnwindDest(&MBB));
180  else
181  BuildMI(MBB, TI, TI->getDebugLoc(),
182  TII.get(WebAssembly::RETHROW_TO_CALLER));
183 
184  TI->eraseFromParent();
185  Changed = true;
186  break;
187  }
188  }
189  }
190  return Changed;
191 }
192 
193 // Hoist catch instructions to the beginning of their matching EH pad BBs in
194 // case,
195 // (1) catch instruction is not the first instruction in EH pad.
196 // ehpad:
197 // some_other_instruction
198 // ...
199 // %exn = catch 0
200 // (2) catch instruction is in a non-EH pad BB. For example,
201 // ehpad:
202 // br bb0
203 // bb0:
204 // %exn = catch 0
205 bool WebAssemblyLateEHPrepare::hoistCatches(MachineFunction &MF) {
206  bool Changed = false;
208  for (auto &MBB : MF)
209  for (auto &MI : MBB)
211  Catches.push_back(&MI);
212 
213  for (auto *Catch : Catches) {
215  assert(EHPad && "No matching EH pad for catch");
216  if (EHPad->begin() == Catch)
217  continue;
218  Changed = true;
219  EHPad->insert(EHPad->begin(), Catch->removeFromParent());
220  }
221  return Changed;
222 }
223 
224 // Add catch_all to beginning of cleanup pads.
225 bool WebAssemblyLateEHPrepare::addCatchAlls(MachineFunction &MF) {
226  bool Changed = false;
227  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
228 
229  for (auto &MBB : MF) {
230  if (!MBB.isEHPad())
231  continue;
232  // This runs after hoistCatches(), so we assume that if there is a catch,
233  // that should be the first instruction in an EH pad.
234  if (!WebAssembly::isCatch(*MBB.begin())) {
235  Changed = true;
236  BuildMI(MBB, MBB.begin(), MBB.begin()->getDebugLoc(),
237  TII.get(WebAssembly::CATCH_ALL));
238  }
239  }
240  return Changed;
241 }
242 
243 // Add a 'rethrow' instruction after __cxa_rethrow() call
244 bool WebAssemblyLateEHPrepare::addRethrows(MachineFunction &MF) {
245  bool Changed = false;
246  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
247  auto *EHInfo = MF.getWasmEHFuncInfo();
248 
249  for (auto &MBB : MF)
250  for (auto &MI : MBB) {
251  // Check if it is a call to __cxa_rethrow()
252  if (!MI.isCall())
253  continue;
254  MachineOperand &CalleeOp = MI.getOperand(0);
255  if (!CalleeOp.isGlobal() ||
257  continue;
258 
259  // Now we have __cxa_rethrow() call
260  Changed = true;
261  auto InsertPt = std::next(MachineBasicBlock::iterator(MI));
262  while (InsertPt != MBB.end() && InsertPt->isLabel()) // Skip EH_LABELs
263  ++InsertPt;
264  MachineInstr *Rethrow = nullptr;
265  if (EHInfo->hasThrowUnwindDest(&MBB))
266  Rethrow = BuildMI(MBB, InsertPt, MI.getDebugLoc(),
267  TII.get(WebAssembly::RETHROW))
268  .addMBB(EHInfo->getThrowUnwindDest(&MBB));
269  else
270  Rethrow = BuildMI(MBB, InsertPt, MI.getDebugLoc(),
271  TII.get(WebAssembly::RETHROW_TO_CALLER));
272 
273  // Because __cxa_rethrow does not return, the instruction after the
274  // rethrow should be an unreachable or a branch to another BB that should
275  // eventually lead to an unreachable. Delete it because rethrow itself is
276  // a terminator, and also delete non-EH pad successors if any.
277  MBB.erase(std::next(MachineBasicBlock::iterator(Rethrow)), MBB.end());
278  SmallVector<MachineBasicBlock *, 8> NonPadSuccessors;
279  for (auto *Succ : MBB.successors())
280  if (!Succ->isEHPad())
281  NonPadSuccessors.push_back(Succ);
282  for (auto *Succ : NonPadSuccessors)
283  MBB.removeSuccessor(Succ);
284  eraseDeadBBsAndChildren(NonPadSuccessors);
285  }
286  return Changed;
287 }
288 
289 // Terminate pads are an single-BB EH pad in the form of
290 // termpad:
291 // %exn = catch 0
292 // call @__clang_call_terminate(%exn)
293 // unreachable
294 // (There can be local.set and local.gets before the call if we didn't run
295 // RegStackify)
296 // But code transformations can change or add more control flow, so the call to
297 // __clang_call_terminate() function may not be in the original EH pad anymore.
298 // This ensures every terminate pad is a single BB in the form illustrated
299 // above.
300 bool WebAssemblyLateEHPrepare::ensureSingleBBTermPads(MachineFunction &MF) {
301  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
302 
303  // Find calls to __clang_call_terminate()
304  SmallVector<MachineInstr *, 8> ClangCallTerminateCalls;
305  for (auto &MBB : MF)
306  for (auto &MI : MBB)
307  if (MI.isCall()) {
308  const MachineOperand &CalleeOp = MI.getOperand(0);
309  if (CalleeOp.isGlobal() && CalleeOp.getGlobal()->getName() ==
311  ClangCallTerminateCalls.push_back(&MI);
312  }
313 
314  bool Changed = false;
315  for (auto *Call : ClangCallTerminateCalls) {
316  MachineBasicBlock *EHPad = getMatchingEHPad(Call);
317  assert(EHPad && "No matching EH pad for catch");
318 
319  // If it is already the form we want, skip it
320  if (Call->getParent() == EHPad &&
321  Call->getNextNode()->getOpcode() == WebAssembly::UNREACHABLE)
322  continue;
323 
324  // In case the __clang_call_terminate() call is not in its matching EH pad,
325  // move the call to the end of EH pad and add an unreachable instruction
326  // after that. Delete all successors and their children if any, because here
327  // the program terminates.
328  Changed = true;
329  MachineInstr *Catch = &*EHPad->begin();
330  // This runs after hoistCatches(), so catch instruction should be at the top
331  assert(WebAssembly::isCatch(*Catch));
332  // Takes the result register of the catch instruction as argument. There may
333  // have been some other local.set/local.gets in between, but at this point
334  // we don't care.
335  Call->getOperand(1).setReg(Catch->getOperand(0).getReg());
336  auto InsertPos = std::next(MachineBasicBlock::iterator(Catch));
337  EHPad->insert(InsertPos, Call->removeFromParent());
338  BuildMI(*EHPad, InsertPos, Call->getDebugLoc(),
339  TII.get(WebAssembly::UNREACHABLE));
340  EHPad->erase(InsertPos, EHPad->end());
342  EHPad->succ_end());
343  for (auto *Succ : Succs)
344  EHPad->removeSuccessor(Succ);
346  }
347  return Changed;
348 }
349 
350 // In case there are multiple terminate pads, merge them into one for code size.
351 // This runs after ensureSingleBBTermPads() and assumes every terminate pad is a
352 // single BB.
353 // In principle this violates EH scope relationship because it can merge
354 // multiple inner EH scopes, each of which is in different outer EH scope. But
355 // getEHScopeMembership() function will not be called after this, so it is fine.
356 bool WebAssemblyLateEHPrepare::mergeTerminatePads(MachineFunction &MF) {
358  for (auto &MBB : MF)
360  TermPads.push_back(&MBB);
361  if (TermPads.empty())
362  return false;
363 
364  MachineBasicBlock *UniqueTermPad = TermPads.front();
365  for (auto *TermPad :
366  llvm::make_range(std::next(TermPads.begin()), TermPads.end())) {
367  SmallVector<MachineBasicBlock *, 2> Preds(TermPad->pred_begin(),
368  TermPad->pred_end());
369  for (auto *Pred : Preds)
370  Pred->replaceSuccessor(TermPad, UniqueTermPad);
371  TermPad->eraseFromParent();
372  }
373  return true;
374 }
375 
376 // Terminate pads are cleanup pads, so they should start with a 'catch_all'
377 // instruction. But in the Itanium model, when we have a C++ exception object,
378 // we pass them to __clang_call_terminate function, which calls __cxa_end_catch
379 // with the passed exception pointer and then std::terminate. This is the reason
380 // that terminate pads are generated with not a catch_all but a catch
381 // instruction in clang and earlier llvm passes. Here we append a terminate pad
382 // with a catch_all after each existing terminate pad so we can also catch
383 // foreign exceptions. For every terminate pad:
384 // %exn = catch 0
385 // call @__clang_call_terminate(%exn)
386 // unreachable
387 // We append this BB right after that:
388 // catch_all
389 // call @std::terminate()
390 // unreachable
391 bool WebAssemblyLateEHPrepare::addCatchAllTerminatePads(MachineFunction &MF) {
392  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
394  for (auto &MBB : MF)
396  TermPads.push_back(&MBB);
397  if (TermPads.empty())
398  return false;
399 
401  MF.getFunction().getParent()->getFunction(WebAssembly::StdTerminateFn);
402  assert(StdTerminateFn && "There is no std::terminate() function");
403  for (auto *CatchTermPad : TermPads) {
404  DebugLoc DL = CatchTermPad->findDebugLoc(CatchTermPad->begin());
405  auto *CatchAllTermPad = MF.CreateMachineBasicBlock();
406  MF.insert(std::next(MachineFunction::iterator(CatchTermPad)),
407  CatchAllTermPad);
408  CatchAllTermPad->setIsEHPad();
409  BuildMI(CatchAllTermPad, DL, TII.get(WebAssembly::CATCH_ALL));
410  BuildMI(CatchAllTermPad, DL, TII.get(WebAssembly::CALL_VOID))
411  .addGlobalAddress(StdTerminateFn);
412  BuildMI(CatchAllTermPad, DL, TII.get(WebAssembly::UNREACHABLE));
413 
414  // Actually this CatchAllTermPad (new terminate pad with a catch_all) is not
415  // a successor of an existing terminate pad. CatchAllTermPad should have all
416  // predecessors CatchTermPad has instead. This is a hack to force
417  // CatchAllTermPad be always sorted right after CatchTermPad; the correct
418  // predecessor-successor relationships will be restored in CFGStackify pass.
419  CatchTermPad->addSuccessor(CatchAllTermPad);
420  }
421  return true;
422 }
#define DEBUG_TYPE
MachineBasicBlock * getMBB() const
This class represents lattice values for constants.
Definition: AllocatorList.h:24
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:383
unsigned getReg() const
getReg - Returns the register number.
A debug info location.
Definition: DebugLoc.h:34
This file contains the entry points for global functions defined in the LLVM WebAssembly back-end...
CLEANUPRET - Represents a return from a cleanup block funclet.
Definition: ISDOpcodes.h:690
void eraseFromParent()
This method unlinks &#39;this&#39; from the containing function and deletes it.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
static void eraseDeadBBsAndChildren(const Container &MBBs)
const char *const StdTerminateFn
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition: Function.h:702
bool isThrow(const MachineInstr &MI)
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Windows Exception Handling.
This file contains the declaration of the WebAssembly-specific utility functions. ...
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:629
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
const GlobalValue * getGlobal() const
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
This file provides WebAssembly-specific target descriptions.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
const char *const CxaRethrowFn
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
const MachineBasicBlock & front() const
INITIALIZE_PASS(WebAssemblyLateEHPrepare, DEBUG_TYPE, "WebAssembly Late Exception Preparation", false, false) FunctionPass *llvm
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
This file declares the WebAssembly-specific subclass of TargetSubtarget.
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
const char *const ClangCallTerminateFn
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
bool isCatchTerminatePad(const MachineBasicBlock &MBB)
Returns if the given BB is a single BB terminate pad which starts with a &#39;catch&#39; instruction.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
static MachineBasicBlock * getMatchingEHPad(MachineInstr *MI)
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:394
CATCHRET - Represents a return from a catch block funclet.
Definition: ISDOpcodes.h:686
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:254
const WasmEHFuncInfo * getWasmEHFuncInfo() const
getWasmEHFuncInfo - Return information about how the current function uses Wasm exception handling...
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
bool isEHPad() const
Returns true if the block is a landing pad.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
FunctionPass * createWebAssemblyLateEHPrepare()
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
bool isCatch(const MachineInstr &MI)
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
ExceptionHandling getExceptionHandlingType() const
Definition: MCAsmInfo.h:570
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
#define LLVM_DEBUG(X)
Definition: Debug.h:123
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414