LLVM  8.0.1
SystemZTDC.cpp
Go to the documentation of this file.
1 //===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass looks for instructions that can be replaced by a Test Data Class
11 // instruction, and replaces them when profitable.
12 //
13 // Roughly, the following rules are recognized:
14 //
15 // 1: fcmp pred X, 0 -> tdc X, mask
16 // 2: fcmp pred X, +-inf -> tdc X, mask
17 // 3: fcmp pred X, +-minnorm -> tdc X, mask
18 // 4: tdc (fabs X), mask -> tdc X, newmask
19 // 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit]
20 // 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask
21 // 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask
22 // 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2)
23 // 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2)
24 // 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2)
25 //
26 // The pass works in 4 steps:
27 //
28 // 1. All fcmp and icmp instructions in a function are checked for a match
29 // with rules 1-3 and 5-7. Their TDC equivalents are stored in
30 // the ConvertedInsts mapping. If the operand of a fcmp instruction is
31 // a fabs, it's also folded according to rule 4.
32 // 2. All and/or/xor i1 instructions whose both operands have been already
33 // mapped are mapped according to rules 8-10. LogicOpsWorklist is used
34 // as a queue of instructions to check.
35 // 3. All mapped instructions that are considered worthy of conversion (ie.
36 // replacing them will actually simplify the final code) are replaced
37 // with a call to the s390.tdc intrinsic.
38 // 4. All intermediate results of replaced instructions are removed if unused.
39 //
40 // Instructions that match rules 1-3 are considered unworthy of conversion
41 // on their own (since a comparison instruction is superior), but are mapped
42 // in the hopes of folding the result using rules 4 and 8-10 (likely removing
43 // the original comparison in the process).
44 //
45 //===----------------------------------------------------------------------===//
46 
47 #include "SystemZ.h"
48 #include "llvm/ADT/MapVector.h"
49 #include "llvm/IR/Constants.h"
50 #include "llvm/IR/IRBuilder.h"
51 #include "llvm/IR/InstIterator.h"
52 #include "llvm/IR/Instructions.h"
53 #include "llvm/IR/IntrinsicInst.h"
55 #include "llvm/IR/Module.h"
56 #include <deque>
57 #include <set>
58 
59 using namespace llvm;
60 
61 namespace llvm {
63 }
64 
65 namespace {
66 
67 class SystemZTDCPass : public FunctionPass {
68 public:
69  static char ID;
70  SystemZTDCPass() : FunctionPass(ID) {
72  }
73 
74  bool runOnFunction(Function &F) override;
75 private:
76  // Maps seen instructions that can be mapped to a TDC, values are
77  // (TDC operand, TDC mask, worthy flag) triples.
79  // The queue of and/or/xor i1 instructions to be potentially folded.
80  std::vector<BinaryOperator *> LogicOpsWorklist;
81  // Instructions matched while folding, to be removed at the end if unused.
82  std::set<Instruction *> PossibleJunk;
83 
84  // Tries to convert a fcmp instruction.
85  void convertFCmp(CmpInst &I);
86 
87  // Tries to convert an icmp instruction.
88  void convertICmp(CmpInst &I);
89 
90  // Tries to convert an i1 and/or/xor instruction, whose both operands
91  // have been already converted.
92  void convertLogicOp(BinaryOperator &I);
93 
94  // Marks an instruction as converted - adds it to ConvertedInsts and adds
95  // any and/or/xor i1 users to the queue.
96  void converted(Instruction *I, Value *V, int Mask, bool Worthy) {
97  ConvertedInsts[I] = std::make_tuple(V, Mask, Worthy);
98  auto &M = *I->getFunction()->getParent();
99  auto &Ctx = M.getContext();
100  for (auto *U : I->users()) {
101  auto *LI = dyn_cast<BinaryOperator>(U);
102  if (LI && LI->getType() == Type::getInt1Ty(Ctx) &&
103  (LI->getOpcode() == Instruction::And ||
104  LI->getOpcode() == Instruction::Or ||
105  LI->getOpcode() == Instruction::Xor)) {
106  LogicOpsWorklist.push_back(LI);
107  }
108  }
109  }
110 };
111 
112 } // end anonymous namespace
113 
114 char SystemZTDCPass::ID = 0;
115 INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc",
116  "SystemZ Test Data Class optimization", false, false)
117 
119  return new SystemZTDCPass();
120 }
121 
122 void SystemZTDCPass::convertFCmp(CmpInst &I) {
123  Value *Op0 = I.getOperand(0);
124  auto *Const = dyn_cast<ConstantFP>(I.getOperand(1));
125  auto Pred = I.getPredicate();
126  // Only comparisons with consts are interesting.
127  if (!Const)
128  return;
129  // Compute the smallest normal number (and its negation).
130  auto &Sem = Op0->getType()->getFltSemantics();
131  APFloat Smallest = APFloat::getSmallestNormalized(Sem);
132  APFloat NegSmallest = Smallest;
133  NegSmallest.changeSign();
134  // Check if Const is one of our recognized consts.
135  int WhichConst;
136  if (Const->isZero()) {
137  // All comparisons with 0 can be converted.
138  WhichConst = 0;
139  } else if (Const->isInfinity()) {
140  // Likewise for infinities.
141  WhichConst = Const->isNegative() ? 2 : 1;
142  } else if (Const->isExactlyValue(Smallest)) {
143  // For Smallest, we cannot do EQ separately from GT.
144  if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE &&
145  (Pred & CmpInst::FCMP_OGE) != 0)
146  return;
147  WhichConst = 3;
148  } else if (Const->isExactlyValue(NegSmallest)) {
149  // Likewise for NegSmallest, we cannot do EQ separately from LT.
150  if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE &&
151  (Pred & CmpInst::FCMP_OLE) != 0)
152  return;
153  WhichConst = 4;
154  } else {
155  // Not one of our special constants.
156  return;
157  }
158  // Partial masks to use for EQ, GT, LT, UN comparisons, respectively.
159  static const int Masks[][4] = {
160  { // 0
161  SystemZ::TDCMASK_ZERO, // eq
164  SystemZ::TDCMASK_NAN, // un
165  },
166  { // inf
168  0, // gt
173  SystemZ::TDCMASK_NAN, // un
174  },
175  { // -inf
181  0, // lt
182  SystemZ::TDCMASK_NAN, // un
183  },
184  { // minnorm
185  0, // eq (unsupported)
187  SystemZ::TDCMASK_INFINITY_PLUS), // gt (actually ge)
191  SystemZ::TDCMASK_NAN, // un
192  },
193  { // -minnorm
194  0, // eq (unsupported)
199  SystemZ::TDCMASK_INFINITY_MINUS), // lt (actually le)
200  SystemZ::TDCMASK_NAN, // un
201  }
202  };
203  // Construct the mask as a combination of the partial masks.
204  int Mask = 0;
205  if (Pred & CmpInst::FCMP_OEQ)
206  Mask |= Masks[WhichConst][0];
207  if (Pred & CmpInst::FCMP_OGT)
208  Mask |= Masks[WhichConst][1];
209  if (Pred & CmpInst::FCMP_OLT)
210  Mask |= Masks[WhichConst][2];
211  if (Pred & CmpInst::FCMP_UNO)
212  Mask |= Masks[WhichConst][3];
213  // A lone fcmp is unworthy of tdc conversion on its own, but may become
214  // worthy if combined with fabs.
215  bool Worthy = false;
216  if (CallInst *CI = dyn_cast<CallInst>(Op0)) {
217  Function *F = CI->getCalledFunction();
218  if (F && F->getIntrinsicID() == Intrinsic::fabs) {
219  // Fold with fabs - adjust the mask appropriately.
220  Mask &= SystemZ::TDCMASK_PLUS;
221  Mask |= Mask >> 1;
222  Op0 = CI->getArgOperand(0);
223  // A combination of fcmp with fabs is a win, unless the constant
224  // involved is 0 (which is handled by later passes).
225  Worthy = WhichConst != 0;
226  PossibleJunk.insert(CI);
227  }
228  }
229  converted(&I, Op0, Mask, Worthy);
230 }
231 
232 void SystemZTDCPass::convertICmp(CmpInst &I) {
233  Value *Op0 = I.getOperand(0);
234  auto *Const = dyn_cast<ConstantInt>(I.getOperand(1));
235  auto Pred = I.getPredicate();
236  // All our icmp rules involve comparisons with consts.
237  if (!Const)
238  return;
239  if (auto *Cast = dyn_cast<BitCastInst>(Op0)) {
240  // Check for icmp+bitcast used for signbit.
241  if (!Cast->getSrcTy()->isFloatTy() &&
242  !Cast->getSrcTy()->isDoubleTy() &&
243  !Cast->getSrcTy()->isFP128Ty())
244  return;
245  Value *V = Cast->getOperand(0);
246  int Mask;
247  if (Pred == CmpInst::ICMP_SLT && Const->isZero()) {
248  // icmp slt (bitcast X), 0 - set if sign bit true
249  Mask = SystemZ::TDCMASK_MINUS;
250  } else if (Pred == CmpInst::ICMP_SGT && Const->isMinusOne()) {
251  // icmp sgt (bitcast X), -1 - set if sign bit false
252  Mask = SystemZ::TDCMASK_PLUS;
253  } else {
254  // Not a sign bit check.
255  return;
256  }
257  PossibleJunk.insert(Cast);
258  converted(&I, V, Mask, true);
259  } else if (auto *CI = dyn_cast<CallInst>(Op0)) {
260  // Check if this is a pre-existing call of our tdc intrinsic.
261  Function *F = CI->getCalledFunction();
262  if (!F || F->getIntrinsicID() != Intrinsic::s390_tdc)
263  return;
264  if (!Const->isZero())
265  return;
266  Value *V = CI->getArgOperand(0);
267  auto *MaskC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
268  // Bail if the mask is not a constant.
269  if (!MaskC)
270  return;
271  int Mask = MaskC->getZExtValue();
272  Mask &= SystemZ::TDCMASK_ALL;
273  if (Pred == CmpInst::ICMP_NE) {
274  // icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC
275  } else if (Pred == CmpInst::ICMP_EQ) {
276  // icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask
277  Mask ^= SystemZ::TDCMASK_ALL;
278  } else {
279  // An unknown comparison - ignore.
280  return;
281  }
282  PossibleJunk.insert(CI);
283  converted(&I, V, Mask, false);
284  }
285 }
286 
287 void SystemZTDCPass::convertLogicOp(BinaryOperator &I) {
288  Value *Op0, *Op1;
289  int Mask0, Mask1;
290  bool Worthy0, Worthy1;
291  std::tie(Op0, Mask0, Worthy0) = ConvertedInsts[cast<Instruction>(I.getOperand(0))];
292  std::tie(Op1, Mask1, Worthy1) = ConvertedInsts[cast<Instruction>(I.getOperand(1))];
293  if (Op0 != Op1)
294  return;
295  int Mask;
296  switch (I.getOpcode()) {
297  case Instruction::And:
298  Mask = Mask0 & Mask1;
299  break;
300  case Instruction::Or:
301  Mask = Mask0 | Mask1;
302  break;
303  case Instruction::Xor:
304  Mask = Mask0 ^ Mask1;
305  break;
306  default:
307  llvm_unreachable("Unknown op in convertLogicOp");
308  }
309  converted(&I, Op0, Mask, true);
310 }
311 
313  ConvertedInsts.clear();
314  LogicOpsWorklist.clear();
315  PossibleJunk.clear();
316 
317  // Look for icmp+fcmp instructions.
318  for (auto &I : instructions(F)) {
319  if (I.getOpcode() == Instruction::FCmp)
320  convertFCmp(cast<CmpInst>(I));
321  else if (I.getOpcode() == Instruction::ICmp)
322  convertICmp(cast<CmpInst>(I));
323  }
324 
325  // If none found, bail already.
326  if (ConvertedInsts.empty())
327  return false;
328 
329  // Process the queue of logic instructions.
330  while (!LogicOpsWorklist.empty()) {
331  BinaryOperator *Op = LogicOpsWorklist.back();
332  LogicOpsWorklist.pop_back();
333  // If both operands mapped, and the instruction itself not yet mapped,
334  // convert it.
335  if (ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(0))) &&
336  ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(1))) &&
337  !ConvertedInsts.count(Op))
338  convertLogicOp(*Op);
339  }
340 
341  // Time to actually replace the instructions. Do it in the reverse order
342  // of finding them, since there's a good chance the earlier ones will be
343  // unused (due to being folded into later ones).
344  Module &M = *F.getParent();
345  auto &Ctx = M.getContext();
346  Value *Zero32 = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
347  bool MadeChange = false;
348  for (auto &It : reverse(ConvertedInsts)) {
349  Instruction *I = It.first;
350  Value *V;
351  int Mask;
352  bool Worthy;
353  std::tie(V, Mask, Worthy) = It.second;
354  if (!I->user_empty()) {
355  // If used and unworthy of conversion, skip it.
356  if (!Worthy)
357  continue;
358  // Call the intrinsic, compare result with 0.
360  V->getType());
361  IRBuilder<> IRB(I);
362  Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask);
363  Instruction *TDC = IRB.CreateCall(TDCFunc, {V, MaskVal});
364  Value *ICmp = IRB.CreateICmp(CmpInst::ICMP_NE, TDC, Zero32);
365  I->replaceAllUsesWith(ICmp);
366  }
367  // If unused, or used and converted, remove it.
368  I->eraseFromParent();
369  MadeChange = true;
370  }
371 
372  if (!MadeChange)
373  return false;
374 
375  // We've actually done something - now clear misc accumulated junk (fabs,
376  // bitcast).
377  for (auto *I : PossibleJunk)
378  if (I->user_empty())
379  I->eraseFromParent();
380 
381  return true;
382 }
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:68
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:636
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:173
void clear()
Definition: MapVector.h:89
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
This class represents lattice values for constants.
Definition: AllocatorList.h:24
BinaryOps getOpcode() const
Definition: InstrTypes.h:316
const unsigned TDCMASK_ZERO
Definition: SystemZ.h:132
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
bool user_empty() const
Definition: Value.h:364
void initializeSystemZTDCPassPass(PassRegistry &)
const unsigned TDCMASK_NEGATIVE
Definition: SystemZ.h:136
This class represents a function call, abstracting a target machine&#39;s calling convention.
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:652
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:38
F(f)
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
void changeSign()
Definition: APFloat.h:1050
const unsigned TDCMASK_POSITIVE
Definition: SystemZ.h:133
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:656
FunctionPass * createSystemZTDCPass()
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:244
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:743
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:653
bool empty() const
Definition: MapVector.h:80
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:267
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1020
Value * getOperand(unsigned i) const
Definition: User.h:170
static bool runOnFunction(Function &F, bool PostInlining)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
const unsigned TDCMASK_NORMAL_PLUS
Definition: SystemZ.h:121
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const unsigned TDCMASK_SUBNORMAL_MINUS
Definition: SystemZ.h:124
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:60
const unsigned TDCMASK_NAN
Definition: SystemZ.h:139
const unsigned TDCMASK_INFINITY_PLUS
Definition: SystemZ.h:125
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
signed greater than
Definition: InstrTypes.h:673
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:650
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
const unsigned TDCMASK_ALL
Definition: SystemZ.h:151
Module.h This file contains the declarations for the Module class.
size_type count(const KeyT &Key) const
Definition: MapVector.h:143
signed less than
Definition: InstrTypes.h:675
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:622
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:194
iterator_range< user_iterator > users()
Definition: Value.h:400
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:721
INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc", "SystemZ Test Data Class optimization", false, false) FunctionPass *llvm
Definition: SystemZTDC.cpp:115
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
#define I(x, y, z)
Definition: MD5.cpp:58
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition: APFloat.h:924
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:649
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566
LLVM Value Representation.
Definition: Value.h:73
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
const unsigned TDCMASK_INFINITY_MINUS
Definition: SystemZ.h:126
const unsigned TDCMASK_NORMAL_MINUS
Definition: SystemZ.h:122
inst_range instructions(Function *F)
Definition: InstIterator.h:134
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:39
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:651
const unsigned TDCMASK_PLUS
Definition: SystemZ.h:143
const unsigned TDCMASK_MINUS
Definition: SystemZ.h:147
const unsigned TDCMASK_SUBNORMAL_PLUS
Definition: SystemZ.h:123
const fltSemantics & getFltSemantics() const
Definition: Type.h:169